Subversion Repositories DevTools

Rev

Rev 5282 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5282 dpurdie 1
#!/usr/bin/env perl
2
# cloc -- Count Lines of Code                  {{{1
3
# Copyright (C) 2006-2015 Al Danial <al.danial@gmail.com>
4
# First release August 2006
5
#
6
# Includes code from:
7
#   - SLOCCount v2.26 
8
#     http://www.dwheeler.com/sloccount/
9
#     by David Wheeler.
10
#   - Regexp::Common v2.120
11
#     http://search.cpan.org/~abigail/Regexp-Common-2.120/lib/Regexp/Common.pm
12
#     by Damian Conway and Abigail.
13
#   - Win32::Autoglob 
14
#     http://search.cpan.org/~sburke/Win32-Autoglob-1.01/Autoglob.pm
15
#     by Sean M. Burke.
16
#   - Algorithm::Diff
17
#     http://search.cpan.org/~tyemq/Algorithm-Diff-1.1902/lib/Algorithm/Diff.pm
18
#     by Tye McQueen.
19
#
20
# This program is free software; you can redistribute it and/or modify
21
# it under the terms of the GNU General Public License as published by
22
# the Free Software Foundation; either version 2 of the License, or
23
# (at your option) any later version.
24
#
25
# This program is distributed in the hope that it will be useful,
26
# but WITHOUT ANY WARRANTY; without even the implied warranty of
27
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
28
# GNU General Public License for more details:
29
# <http://www.gnu.org/licenses/gpl.txt>.
30
#
31
# 1}}}
32
my $VERSION = "1.65";  # odd number == beta; even number == stable
33
my $URL     = "https://github.com/AlDanial/cloc";
34
require 5.006;
35
# use modules                                  {{{1
36
use warnings;
37
use strict;
38
use Getopt::Long;
39
use File::Basename;
40
use File::Temp qw { tempfile tempdir };
41
use File::Find;
42
use File::Path;
43
use File::Spec;
44
use IO::File;
45
use POSIX "strftime";
46
 
47
# Digest::MD5 isn't in the standard distribution. Use it only if installed.
48
my $HAVE_Digest_MD5 = 0;
49
eval "use Digest::MD5;";
50
if (defined $Digest::MD5::VERSION) {
51
    $HAVE_Digest_MD5 = 1;
52
} else {
53
    warn "Digest::MD5 not installed; will skip file uniqueness checks.\n";
54
}
55
 
56
# Time::HiRes became standard with Perl 5.8
57
my $HAVE_Time_HiRes = 0;
58
eval "use Time::HiRes;";
59
$HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION;
60
 
61
my $HAVE_Rexexp_Common;
62
# Regexp::Common isn't in the standard distribution.  It will
63
# be installed in a temp directory if necessary.
64
BEGIN {
65
    if (eval "use Regexp::Common;") {
66
        $HAVE_Rexexp_Common = 1;
67
    } else {
68
        $HAVE_Rexexp_Common = 0;
69
    }
70
}
71
 
72
my $HAVE_Algorith_Diff = 0;
73
# Algorithm::Diff isn't in the standard distribution.  It will
74
# be installed in a temp directory if necessary.
75
eval "use Algorithm::Diff qw ( sdiff ) ";
76
if (defined $Algorithm::Diff::VERSION) {
77
    $HAVE_Algorith_Diff = 1;
78
} else {
79
    Install_Algorithm_Diff();
80
}
81
# print "2 HAVE_Algorith_Diff = $HAVE_Algorith_Diff\n";
82
# test_alg_diff($ARGV[$#ARGV - 1], $ARGV[$#ARGV]); die;
83
 
84
# Uncomment next two lines when building Windows executable with perl2exe
85
# or if running on a system that already has Regexp::Common.
86
#use Regexp::Common;
87
#$HAVE_Rexexp_Common = 1;
88
 
89
#perl2exe_include "Regexp/Common/whitespace.pm"
90
#perl2exe_include "Regexp/Common/URI.pm"
91
#perl2exe_include "Regexp/Common/URI/fax.pm"
92
#perl2exe_include "Regexp/Common/URI/file.pm"
93
#perl2exe_include "Regexp/Common/URI/ftp.pm"
94
#perl2exe_include "Regexp/Common/URI/gopher.pm"
95
#perl2exe_include "Regexp/Common/URI/http.pm"
96
#perl2exe_include "Regexp/Common/URI/pop.pm"
97
#perl2exe_include "Regexp/Common/URI/prospero.pm"
98
#perl2exe_include "Regexp/Common/URI/news.pm"
99
#perl2exe_include "Regexp/Common/URI/tel.pm"
100
#perl2exe_include "Regexp/Common/URI/telnet.pm"
101
#perl2exe_include "Regexp/Common/URI/tv.pm"
102
#perl2exe_include "Regexp/Common/URI/wais.pm"
103
#perl2exe_include "Regexp/Common/CC.pm"
104
#perl2exe_include "Regexp/Common/SEN.pm"
105
#perl2exe_include "Regexp/Common/number.pm"
106
#perl2exe_include "Regexp/Common/delimited.pm"
107
#perl2exe_include "Regexp/Common/profanity.pm"
108
#perl2exe_include "Regexp/Common/net.pm"
109
#perl2exe_include "Regexp/Common/zip.pm"
110
#perl2exe_include "Regexp/Common/comment.pm"
111
#perl2exe_include "Regexp/Common/balanced.pm"
112
#perl2exe_include "Regexp/Common/lingua.pm"
113
#perl2exe_include "Regexp/Common/list.pm"
114
#perl2exe_include "File/Glob.pm"
115
 
116
use Text::Tabs qw { expand };
117
use Cwd qw { cwd };
118
use File::Glob;
119
# 1}}}
120
# Usage information, options processing.       {{{1
121
my $ON_WINDOWS = 0;
122
   $ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT");
123
if ($ON_WINDOWS and $ENV{'SHELL'}) {
124
    if ($ENV{'SHELL'} =~ m{^/}) {
125
        $ON_WINDOWS = 0;  # make Cygwin look like Unix
126
    } else {
127
        $ON_WINDOWS = 1;  # MKS defines $SHELL but still acts like Windows
128
    }
129
}
130
 
131
my $NN     = chr(27) . "[0m";  # normal
132
   $NN     = "" if $ON_WINDOWS or !(-t STDERR); # -t STDERR:  is it a terminal?
133
my $BB     = chr(27) . "[1m";  # bold
134
   $BB     = "" if $ON_WINDOWS or !(-t STDERR);
135
my $script = basename $0;
136
my $usage  = "
137
Usage: $script [options] <file(s)/dir(s)> | <set 1> <set 2> | <report files>
138
 
139
 Count, or compute differences of, physical lines of source code in the
140
 given files (may be archives such as compressed tarballs or zip files)
141
 and/or recursively below the given directories.
142
 
143
 ${BB}Input Options${NN}
144
   --extract-with=<cmd>      This option is only needed if cloc is unable
145
                             to figure out how to extract the contents of
146
                             the input file(s) by itself.
147
                             Use <cmd> to extract binary archive files (e.g.:
148
                             .tar.gz, .zip, .Z).  Use the literal '>FILE<' as
149
                             a stand-in for the actual file(s) to be
150
                             extracted.  For example, to count lines of code
151
                             in the input files
152
                                gcc-4.2.tar.gz  perl-5.8.8.tar.gz
153
                             on Unix use
154
                               --extract-with='gzip -dc >FILE< | tar xf -'
155
                             or, if you have GNU tar,
156
                               --extract-with='tar zxf >FILE<'
157
                             and on Windows use, for example:
158
                               --extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\"
159
                             (if WinZip is installed there).
160
   --list-file=<file>        Take the list of file and/or directory names to
161
                             process from <file>, which has one file/directory
162
                             name per line.  Only exact matches are counted;
163
                             relative path names will be resolved starting from 
164
                             the directory where cloc is invoked.  
165
                             See also --exclude-list-file.
166
   --unicode                 Check binary files to see if they contain Unicode
167
                             expanded ASCII text.  This causes performance to
168
                             drop noticably.
169
 
170
 ${BB}Processing Options${NN}
171
   --autoconf                Count .in files (as processed by GNU autoconf) of
172
                             recognized languages.
173
   --by-file                 Report results for every source file encountered.
174
   --by-file-by-lang         Report results for every source file encountered
175
                             in addition to reporting by language.
176
   --count-and-diff <set1> <set2>    
177
                             First perform direct code counts of source file(s)
178
                             of <set1> and <set2> separately, then perform a diff 
179
                             of these.  Inputs may be pairs of files, directories, 
180
                             or archives.  See also --diff, --diff-alignment,
181
                             --diff-timeout, --ignore-case, --ignore-whitespace.
182
   --diff <set1> <set2>      Compute differences in code and comments between
183
                             source file(s) of <set1> and <set2>.  The inputs
184
                             may be pairs of files, directories, or archives.
185
                             Use --diff-alignment to generate a list showing
186
                             which file pairs where compared.  See also
187
                             --count-and-diff, --diff-alignment, --diff-timeout, 
188
                             --ignore-case, --ignore-whitespace.
189
   --diff-timeout <N>        Ignore files which take more than <N> seconds
190
                             to process.  Default is 10 seconds.
191
                             (Large files with many repeated lines can cause 
192
                             Algorithm::Diff::sdiff() to take hours.)
193
   --follow-links            [Unix only] Follow symbolic links to directories
194
                             (sym links to files are always followed).
195
   --force-lang=<lang>[,<ext>]
196
                             Process all files that have a <ext> extension
197
                             with the counter for language <lang>.  For
198
                             example, to count all .f files with the
199
                             Fortran 90 counter (which expects files to
200
                             end with .f90) instead of the default Fortran 77
201
                             counter, use
202
                               --force-lang=\"Fortran 90\",f
203
                             If <ext> is omitted, every file will be counted
204
                             with the <lang> counter.  This option can be
205
                             specified multiple times (but that is only
206
                             useful when <ext> is given each time).
207
                             See also --script-lang, --lang-no-ext.
208
   --force-lang-def=<file>   Load language processing filters from <file>,
209
                             then use these filters instead of the built-in
210
                             filters.  Note:  languages which map to the same 
211
                             file extension (for example:
212
                             MATLAB/Objective C/MUMPS/Mercury;  Pascal/PHP; 
213
                             Lisp/OpenCL; Lisp/Julia; Perl/Prolog) will be 
214
                             ignored as these require additional processing 
215
                             that is not expressed in language definition 
216
                             files.  Use --read-lang-def to define new 
217
                             language filters without replacing built-in 
218
                             filters (see also --write-lang-def).
219
   --ignore-whitespace       Ignore horizontal white space when comparing files
220
                             with --diff.  See also --ignore-case.
221
   --ignore-case             Ignore changes in case; consider upper- and lower-
222
                             case letters equivalent when comparing files with
223
                             --diff.  See also --ignore-whitespace.
224
   --lang-no-ext=<lang>      Count files without extensions using the <lang>
225
                             counter.  This option overrides internal logic
226
                             for files without extensions (where such files
227
                             are checked against known scripting languages
228
                             by examining the first line for #!).  See also
229
                             --force-lang, --script-lang.
230
   --max-file-size=<MB>      Skip files larger than <MB> megabytes when
231
                             traversing directories.  By default, <MB>=100.
232
                             cloc's memory requirement is roughly twenty times 
233
                             larger than the largest file so running with 
234
                             files larger than 100 MB on a computer with less 
235
                             than 2 GB of memory will cause problems.  
236
                             Note:  this check does not apply to files 
237
                             explicitly passed as command line arguments.
238
   --read-binary-files       Process binary files in addition to text files.
239
                             This is usually a bad idea and should only be
240
                             attempted with text files that have embedded
241
                             binary data.
242
   --read-lang-def=<file>    Load new language processing filters from <file>
243
                             and merge them with those already known to cloc.  
244
                             If <file> defines a language cloc already knows 
245
                             about, cloc's definition will take precedence.  
246
                             Use --force-lang-def to over-ride cloc's 
247
                             definitions (see also --write-lang-def ).
248
   --script-lang=<lang>,<s>  Process all files that invoke <s> as a #!
249
                             scripting language with the counter for language
250
                             <lang>.  For example, files that begin with
251
                                #!/usr/local/bin/perl5.8.8
252
                             will be counted with the Perl counter by using
253
                                --script-lang=Perl,perl5.8.8
254
                             The language name is case insensitive but the
255
                             name of the script language executable, <s>,
256
                             must have the right case.  This option can be
257
                             specified multiple times.  See also --force-lang,
258
                             --lang-no-ext.
259
   --sdir=<dir>              Use <dir> as the scratch directory instead of
260
                             letting File::Temp chose the location.  Files
261
                             written to this location are not removed at
262
                             the end of the run (as they are with File::Temp).
263
   --skip-uniqueness         Skip the file uniqueness check.  This will give
264
                             a performance boost at the expense of counting
265
                             files with identical contents multiple times
266
                             (if such duplicates exist).
267
   --stdin-name=<file>       Give a file name to use to determine the language
268
                             for standard input.
269
   --strip-comments=<ext>    For each file processed, write to the current
270
                             directory a version of the file which has blank
271
                             lines and comments removed.  The name of each
272
                             stripped file is the original file name with
273
                             .<ext> appended to it.  It is written to the
274
                             current directory unless --original-dir is on.
275
   --original-dir            [Only effective in combination with
276
                             --strip-comments]  Write the stripped files
277
                             to the same directory as the original files.
278
   --sum-reports             Input arguments are report files previously
279
                             created with the --report-file option.  Makes
280
                             a cumulative set of results containing the
281
                             sum of data from the individual report files.
282
   --unix                    Override the operating system autodetection
283
                             logic and run in UNIX mode.  See also
284
                             --windows, --show-os.
285
   --windows                 Override the operating system autodetection
286
                             logic and run in Microsoft Windows mode.
287
                             See also --unix, --show-os.
288
 
289
 ${BB}Filter Options${NN}
290
   --exclude-dir=<D1>[,D2,]  Exclude the given comma separated directories
291
                             D1, D2, D3, et cetera, from being scanned.  For
292
                             example  --exclude-dir=.cache,test  will skip
293
                             all files that have /.cache/ or /test/ as part
294
                             of their path.
295
                             Directories named .bzr, .cvs, .hg, .git, and
296
                             .svn are always excluded.
297
   --exclude-ext=<ext1>[,<ext2>[...]]
298
                             Do not count files having the given file name
299
                             extensions.
300
   --exclude-lang=<L1>[,L2,] Exclude the given comma separated languages
301
                             L1, L2, L3, et cetera, from being counted.
302
   --exclude-list-file=<file>  Ignore files and/or directories whose names
303
                             appear in <file>.  <file> should have one file
304
                             name per line.  Only exact matches are ignored;
305
                             relative path names will be resolved starting from 
306
                             the directory where cloc is invoked.  
307
                             See also --list-file.
308
   --include-lang=<L1>[,L2,] Count only the given comma separated languages
309
                             L1, L2, L3, et cetera.
310
   --match-d=<regex>         Only count files in directories matching the Perl
311
                             regex.  For example
312
                               --match-d='/(src|include)/'
313
                             only counts files in directories containing
314
                             /src/ or /include/.
315
   --not-match-d=<regex>     Count all files except those in directories
316
                             matching the Perl regex.
317
   --match-f=<regex>         Only count files whose basenames match the Perl
318
                             regex.  For example
319
                               --match-f='^[Ww]idget'
320
                             only counts files that start with Widget or widget.
321
   --not-match-f=<regex>     Count all files except those whose basenames
322
                             match the Perl regex.
323
   --skip-archive=<regex>    Ignore files that end with the given Perl regular
324
                             expression.  For example, if given
325
                               --skip-archive='(zip|tar(\.(gz|Z|bz2|xz|7z))?)'
326
                             the code will skip files that end with .zip,
327
                             .tar, .tar.gz, .tar.Z, .tar.bz2, .tar.xz, and
328
                             .tar.7z.
329
   --skip-win-hidden         On Windows, ignore hidden files.
330
 
331
 ${BB}Debug Options${NN}
332
   --categorized=<file>      Save names of categorized files to <file>.
333
   --counted=<file>          Save names of processed source files to <file>.
334
   --explain=<lang>          Print the filters used to remove comments for
335
                             language <lang> and exit.  In some cases the 
336
                             filters refer to Perl subroutines rather than
337
                             regular expressions.  An examination of the
338
                             source code may be needed for further explanation.
339
   --diff-alignment=<file>   Write to <file> a list of files and file pairs
340
                             showing which files were added, removed, and/or
341
                             compared during a run with --diff.  This switch
342
                             forces the --diff mode on.
343
   --help                    Print this usage information and exit.
344
   --found=<file>            Save names of every file found to <file>.
345
   --ignored=<file>          Save names of ignored files and the reason they
346
                             were ignored to <file>.
347
   --print-filter-stages     Print processed source code before and after 
348
                             each filter is applied.
349
   --show-ext[=<ext>]        Print information about all known (or just the
350
                             given) file extensions and exit.
351
   --show-lang[=<lang>]      Print information about all known (or just the
352
                             given) languages and exit.
353
   --show-os                 Print the value of the operating system mode
354
                             and exit.  See also --unix, --windows.
355
   -v[=<n>]                  Verbose switch (optional numeric value).
356
   --version                 Print the version of this program and exit.
357
   --write-lang-def=<file>   Writes to <file> the language processing filters
358
                             then exits.  Useful as a first step to creating
359
                             custom language definitions (see also
360
                             --force-lang-def, --read-lang-def).
361
 
362
 ${BB}Output Options${NN}
363
   --3                       Print third-generation language output.
364
                             (This option can cause report summation to fail
365
                             if some reports were produced with this option
366
                             while others were produced without it.)
367
   --by-percent  X           Instead of comment and blank line counts, show 
368
                             these values as percentages based on the value 
369
                             of X in the denominator:
370
                                X = 'c'   -> # lines of code
371
                                X = 'cm'  -> # lines of code + comments
372
                                X = 'cb'  -> # lines of code + blanks
373
                                X = 'cmb' -> # lines of code + comments + blanks
374
                             For example, if using method 'c' and your code
375
                             has twice as many lines of comments as lines 
376
                             of code, the value in the comment column will 
377
                             be 200%.  The code column remains a line count.
378
   --csv                     Write the results as comma separated values.
379
   --csv-delimiter=<C>       Use the character <C> as the delimiter for comma
380
                             separated files instead of ,.  This switch forces
381
   --out=<file>              Synonym for --report-file=<file>.
382
                             --csv to be on.
383
   --progress-rate=<n>       Show progress update after every <n> files are
384
                             processed (default <n>=100).  Set <n> to 0 to
385
                             suppress progress output (useful when redirecting
386
                             output to STDOUT).
387
   --quiet                   Suppress all information messages except for
388
                             the final report.
389
   --report-file=<file>      Write the results to <file> instead of STDOUT.
390
   --sql=<file>              Write results as SQL create and insert statements
391
                             which can be read by a database program such as
392
                             SQLite.  If <file> is -, output is sent to STDOUT.
393
   --sql-append              Append SQL insert statements to the file specified
394
                             by --sql and do not generate table creation
395
                             statements.  Only valid with the --sql option.
396
   --sql-project=<name>      Use <name> as the project identifier for the
397
                             current run.  Only valid with the --sql option.
398
   --sql-style=<style>       Write SQL statements in the given style instead
399
                             of the default SQLite format.  Currently, the 
400
                             only style option is Oracle.
401
   --sum-one                 For plain text reports, show the SUM: output line
402
                             even if only one input file is processed.
403
   --xml                     Write the results in XML.
404
   --xsl=<file>              Reference <file> as an XSL stylesheet within
405
                             the XML output.  If <file> is 1 (numeric one),
406
                             writes a default stylesheet, cloc.xsl (or
407
                             cloc-diff.xsl if --diff is also given).
408
                             This switch forces --xml on.
409
   --yaml                    Write the results in YAML.
410
 
411
";
412
#  Help information for options not yet implemented:
413
#  --inline                  Process comments that appear at the end
414
#                            of lines containing code.
415
#  --html                    Create HTML files of each input file showing
416
#                            comment and code lines in different colors.
417
 
418
$| = 1;  # flush STDOUT
419
my $start_time = get_time();
420
my (
421
    $opt_categorized          ,
422
    $opt_found                ,
423
    @opt_force_lang           ,
424
    $opt_lang_no_ext          ,
425
    @opt_script_lang          ,
426
    $opt_count_diff           ,
427
    $opt_diff                 ,
428
    $opt_diff_alignment       ,
429
    $opt_diff_timeout         ,
430
    $opt_html                 ,
431
    $opt_ignored              ,
432
    $opt_counted              ,
433
    $opt_show_ext             ,
434
    $opt_show_lang            ,
435
    $opt_progress_rate        ,
436
    $opt_print_filter_stages  ,
437
    $opt_v                    ,
438
    $opt_version              ,
439
    $opt_exclude_lang         ,
440
    $opt_exclude_list_file    ,
441
    $opt_exclude_dir          ,
442
    $opt_explain              ,
443
    $opt_include_lang         ,
444
    $opt_force_lang_def       ,
445
    $opt_read_lang_def        ,
446
    $opt_write_lang_def       ,
447
    $opt_strip_comments       ,
448
    $opt_original_dir         ,
449
    $opt_quiet                ,
450
    $opt_report_file          ,
451
    $opt_sdir                 ,
452
    $opt_sum_reports          ,
453
    $opt_unicode              ,
454
    $opt_no3                  ,   # accept it but don't use it
455
    $opt_3                    ,
456
    $opt_extract_with         ,
457
    $opt_by_file              ,
458
    $opt_by_file_by_lang      ,
459
    $opt_by_percent           ,
460
    $opt_xml                  ,
461
    $opt_xsl                  ,
462
    $opt_yaml                 ,
463
    $opt_csv                  ,
464
    $opt_csv_delimiter        ,
465
    $opt_match_f              ,
466
    $opt_not_match_f          ,
467
    $opt_match_d              ,
468
    $opt_not_match_d          ,
469
    $opt_skip_uniqueness      ,
470
    $opt_list_file            ,
471
    $opt_help                 ,
472
    $opt_skip_win_hidden      ,
473
    $opt_read_binary_files    ,
474
    $opt_sql                  ,
475
    $opt_sql_append           ,
476
    $opt_sql_project          ,
477
    $opt_sql_style            ,
478
    $opt_inline               ,
479
    $opt_exclude_ext          ,
480
    $opt_ignore_whitespace    ,
481
    $opt_ignore_case          ,
482
    $opt_follow_links         ,
483
    $opt_autoconf             ,
484
    $opt_sum_one              ,
485
    $opt_stdin_name           ,
486
    $opt_force_on_windows     ,
487
    $opt_force_on_unix        ,   # actually forces !$ON_WINDOWS
488
    $opt_show_os              ,
489
    $opt_skip_archive         ,
490
    $opt_max_file_size        ,   # in MB
491
   );
492
my $getopt_success = GetOptions(
493
   "by_file|by-file"                         => \$opt_by_file             ,
494
   "by_file_by_lang|by-file-by-lang"         => \$opt_by_file_by_lang     ,
495
   "categorized=s"                           => \$opt_categorized         ,
496
   "counted=s"                               => \$opt_counted             ,
497
   "include_lang|include-lang=s"             => \$opt_include_lang        ,
498
   "exclude_lang|exclude-lang=s"             => \$opt_exclude_lang        ,
499
   "exclude_dir|exclude-dir=s"               => \$opt_exclude_dir         ,
500
   "exclude_list_file|exclude-list-file=s"   => \$opt_exclude_list_file   ,
501
   "explain=s"                               => \$opt_explain             ,
502
   "extract_with|extract-with=s"             => \$opt_extract_with        ,
503
   "found=s"                                 => \$opt_found               ,
504
   "count_and_diff|count-and-diff"           => \$opt_count_diff          ,
505
   "diff"                                    => \$opt_diff                ,
506
   "diff-alignment|diff_alignment=s"         => \$opt_diff_alignment      ,
507
   "diff-timeout|diff_timeout=i"             => \$opt_diff_timeout        ,
508
   "html"                                    => \$opt_html                ,
509
   "ignored=s"                               => \$opt_ignored             ,
510
   "quiet"                                   => \$opt_quiet               ,
511
   "force_lang_def|force-lang-def=s"         => \$opt_force_lang_def      ,
512
   "read_lang_def|read-lang-def=s"           => \$opt_read_lang_def       ,
513
   "show_ext|show-ext:s"                     => \$opt_show_ext            ,
514
   "show_lang|show-lang:s"                   => \$opt_show_lang           ,
515
   "progress_rate|progress-rate=i"           => \$opt_progress_rate       ,
516
   "print_filter_stages|print-filter-stages" => \$opt_print_filter_stages ,
517
   "report_file|report-file=s"               => \$opt_report_file         ,
518
   "out=s"                                   => \$opt_report_file         ,
519
   "script_lang|script-lang=s"               => \@opt_script_lang         ,
520
   "sdir=s"                                  => \$opt_sdir                ,
521
   "skip_uniqueness|skip-uniqueness"         => \$opt_skip_uniqueness     ,
522
   "strip_comments|strip-comments=s"         => \$opt_strip_comments      ,
523
   "original_dir|original-dir"               => \$opt_original_dir        ,
524
   "sum_reports|sum-reports"                 => \$opt_sum_reports         ,
525
   "unicode"                                 => \$opt_unicode             ,
526
   "no3"                                     => \$opt_no3                 ,  # ignored
527
   "3"                                       => \$opt_3                   ,
528
   "v:i"                                     => \$opt_v                   ,
529
   "version"                                 => \$opt_version             ,
530
   "write_lang_def|write-lang-def=s"         => \$opt_write_lang_def      ,
531
   "xml"                                     => \$opt_xml                 ,
532
   "xsl=s"                                   => \$opt_xsl                 ,
533
   "force_lang|force-lang=s"                 => \@opt_force_lang          ,
534
   "lang_no_ext|lang-no-ext=s"               => \$opt_lang_no_ext         ,
535
   "yaml"                                    => \$opt_yaml                ,
536
   "csv"                                     => \$opt_csv                 ,
537
   "csv_delimeter|csv-delimiter=s"           => \$opt_csv_delimiter       ,
538
   "match_f|match-f=s"                       => \$opt_match_f             ,
539
   "not_match_f|not-match-f=s"               => \$opt_not_match_f         ,
540
   "match_d|match-d=s"                       => \$opt_match_d             ,
541
   "not_match_d|not-match-d=s"               => \$opt_not_match_d         ,
542
   "list_file|list-file=s"                   => \$opt_list_file           ,
543
   "help"                                    => \$opt_help                ,
544
   "skip_win_hidden|skip-win-hidden"         => \$opt_skip_win_hidden     ,
545
   "read_binary_files|read-binary-files"     => \$opt_read_binary_files   ,
546
   "sql=s"                                   => \$opt_sql                 ,
547
   "sql_project|sql-project=s"               => \$opt_sql_project         ,
548
   "sql_append|sql-append"                   => \$opt_sql_append          ,
549
   "sql_style|sql-style=s"                   => \$opt_sql_style           ,
550
   "inline"                                  => \$opt_inline              ,
551
   "exclude_ext|exclude-ext=s"               => \$opt_exclude_ext         ,
552
   "ignore_whitespace|ignore-whitespace"     => \$opt_ignore_whitespace   ,
553
   "ignore_case|ignore-case"                 => \$opt_ignore_case         ,
554
   "follow_links|follow-links"               => \$opt_follow_links        ,
555
   "autoconf"                                => \$opt_autoconf            ,
556
   "sum_one|sum-one"                         => \$opt_sum_one             ,
557
   "by_percent|by-percent=s"                 => \$opt_by_percent          ,
558
   "stdin_name|stdin-name=s"                 => \$opt_stdin_name          ,
559
   "windows"                                 => \$opt_force_on_windows    ,
560
   "unix"                                    => \$opt_force_on_unix       ,
561
   "show_os|show-os"                         => \$opt_show_os             ,
562
   "skip_archive|skip-archive=s"             => \$opt_skip_archive        ,
563
   "max_file_size|max-file-size=i"           => \$opt_max_file_size       ,
564
  );
565
$opt_by_file  = 1 if defined  $opt_by_file_by_lang;
566
my $CLOC_XSL = "cloc.xsl"; # created with --xsl
567
   $CLOC_XSL = "cloc-diff.xsl" if $opt_diff;
568
die "\n" unless $getopt_success;
569
die $usage if $opt_help;
570
my %Exclude_Language = ();
571
   %Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang) 
572
        if $opt_exclude_lang;
573
my %Exclude_Dir      = ();
574
   %Exclude_Dir      = map { $_ => 1 } split(/,/, $opt_exclude_dir ) 
575
        if $opt_exclude_dir ;
576
my %Include_Language = ();
577
   %Include_Language = map { $_ => 1 } split(/,/, $opt_include_lang) 
578
        if $opt_include_lang;
579
# Forcibly exclude .svn, .cvs, .hg, .git, .bzr directories.  The contents of these
580
# directories often conflict with files of interest.
581
$opt_exclude_dir       = 1;
582
$Exclude_Dir{".svn"}   = 1;
583
$Exclude_Dir{".cvs"}   = 1;
584
$Exclude_Dir{".hg"}    = 1;
585
$Exclude_Dir{".git"}   = 1;
586
$Exclude_Dir{".bzr"}   = 1;
587
$opt_count_diff        = defined $opt_count_diff ? 1 : 0;
588
$opt_diff              = 1  if $opt_diff_alignment;
589
$opt_exclude_ext       = "" unless $opt_exclude_ext;
590
$opt_ignore_whitespace = 0  unless $opt_ignore_whitespace;
591
$opt_ignore_case       = 0  unless $opt_ignore_case;
592
$opt_lang_no_ext       = 0  unless $opt_lang_no_ext;
593
$opt_follow_links      = 0  unless $opt_follow_links;
594
$opt_diff_timeout      =10  unless $opt_diff_timeout;
595
$opt_csv               = 1  if $opt_csv_delimiter;
596
$ON_WINDOWS            = 1  if $opt_force_on_windows;
597
$ON_WINDOWS            = 0  if $opt_force_on_unix;
598
$opt_max_file_size     = 100 unless $opt_max_file_size;
599
 
600
my @COUNT_DIFF_ARGV        = undef;
601
my $COUNT_DIFF_report_file = undef;
602
if ($opt_count_diff) {
603
    die "--count-and-diff requires two arguments; got ", scalar @ARGV, "\n" 
604
        if scalar @ARGV != 2;
605
    # prefix with a dummy term so that $opt_count_diff is the
606
    # index into @COUNT_DIFF_ARGV to work on at each pass
607
    @COUNT_DIFF_ARGV = (undef, $ARGV[0],
608
                               $ARGV[1],
609
                              [$ARGV[0], $ARGV[1]]);  # 3rd pass: diff them
610
    $COUNT_DIFF_report_file = $opt_report_file if $opt_report_file;
611
}
612
 
613
# Options defaults:
614
$opt_progress_rate = 100 unless defined $opt_progress_rate;
615
$opt_progress_rate =   0 if     defined $opt_quiet;
616
if (!defined $opt_v) {
617
    $opt_v  = 0;
618
} elsif (!$opt_v) {
619
    $opt_v  = 1;
620
}
621
if (defined $opt_xsl) {
622
    $opt_xsl = $CLOC_XSL if $opt_xsl eq "1";
623
    $opt_xml = 1;
624
}
625
my $skip_generate_report = 0;
626
$opt_sql_style = 0 unless defined $opt_sql_style;
627
$opt_sql = 0 unless $opt_sql_style or defined $opt_sql;
628
if ($opt_sql eq "-" || $opt_sql eq "1") { # stream SQL output to STDOUT
629
    $opt_quiet            = 1;
630
    $skip_generate_report = 1;
631
    $opt_by_file          = 1;
632
    $opt_sum_reports      = 0;
633
    $opt_progress_rate    = 0;
634
} elsif ($opt_sql)  { # write SQL output to a file
635
    $opt_by_file          = 1;
636
    $skip_generate_report = 1;
637
    $opt_sum_reports      = 0;
638
}
639
if ($opt_sql_style) {
640
    $opt_sql_style = lc $opt_sql_style;
641
    if (!grep { lc $_ eq $opt_sql_style } qw ( Oracle )) {
642
        die "'$opt_sql_style' is not a recognized SQL style.\n";
643
    }
644
}
645
$opt_by_percent = '' unless defined $opt_by_percent;
646
if ($opt_by_percent and $opt_by_percent !~ m/^(c|cm|cb|cmb)$/i) {
647
    die "--by-percent must be either 'c', 'cm', 'cb', or 'cmb'\n";
648
}
649
$opt_by_percent = lc $opt_by_percent;
650
 
651
die $usage unless defined $opt_version         or
652
                  defined $opt_show_lang       or
653
                  defined $opt_show_ext        or
654
                  defined $opt_show_os         or
655
                  defined $opt_write_lang_def  or
656
                  defined $opt_list_file       or
657
                  defined $opt_xsl             or
658
                  defined $opt_explain         or
659
                  scalar @ARGV >= 1;
660
die "--diff requires two arguments; got ", scalar @ARGV, "\n" 
661
    if $opt_diff and scalar @ARGV != 2;
662
if ($opt_version) {
663
    printf "$VERSION\n";
664
    exit;
665
}
666
# 1}}}
667
# Step 1:  Initialize global constants.        {{{1
668
#
669
my $nFiles_Found = 0;  # updated in make_file_list
670
my (%Language_by_Extension, %Language_by_Script,
671
    %Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename,
672
    %Language_by_File, %Scale_Factor, %Known_Binary_Archives,
673
    %EOL_Continuation_re,
674
   );
675
my $ALREADY_SHOWED_HEADER = 0;
676
my $ALREADY_SHOWED_XML_SECTION = 0;
677
my %Error_Codes = ( 'Unable to read'                => -1,
678
                    'Neither file nor directory'    => -2, 
679
                    'Diff error (quoted comments?)' => -3, 
680
                    'Diff error, exceeded timeout'  => -4, 
681
                  );
682
if ($opt_force_lang_def) {
683
    # replace cloc's definitions
684
    read_lang_def(
685
        $opt_force_lang_def    , #        Sample values:
686
        \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77' 
687
        \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
688
        \%Language_by_File     , # Language_by_File{makefile}  = 'make'
689
        \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] = 
690
                                 #      [ 'remove_matches' , '^\s*#'  ]
691
        \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
692
        \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
693
        \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
694
        \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
695
        );
696
} else {
697
    set_constants(               #
698
        \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77' 
699
        \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
700
        \%Language_by_File     , # Language_by_File{makefile}  = 'make'
701
        \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] = 
702
                                 #      [ 'remove_matches' , '^\s*#'  ]
703
        \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
704
        \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
705
        \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
706
        \%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1
707
        \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
708
        );
709
}
710
if ($opt_read_lang_def) {
711
    # augment cloc's definitions (keep cloc's where there are overlaps)
712
    merge_lang_def(
713
        $opt_read_lang_def     , #        Sample values:
714
        \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77' 
715
        \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
716
        \%Language_by_File     , # Language_by_File{makefile}  = 'make'
717
        \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] = 
718
                                 #      [ 'remove_matches' , '^\s*#'  ]
719
        \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
720
        \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
721
        \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
722
        \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
723
        );
724
}
725
if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) {
726
    die_unknown_lang($opt_lang_no_ext, "--lang-no-ext")
727
}
728
check_scale_existence(\%Filters_by_Language, \%Language_by_Extension,
729
                      \%Scale_Factor);
730
 
731
# Process command line provided extension-to-language mapping overrides.
732
# Make a hash of known languages in lower case for easier matching.
733
my %Recognized_Language_lc = (); # key = language name in lc, value = true name
734
foreach my $language (keys %Filters_by_Language) {
735
    my $lang_lc = lc $language;
736
    $Recognized_Language_lc{$lang_lc} = $language;
737
}
738
my %Forced_Extension = (); # file name extensions which user wants to count
739
my $All_One_Language = 0;  # set to !0 if --force-lang's <ext> is missing
740
foreach my $pair (@opt_force_lang) {
741
    my ($lang, $extension) = split(',', $pair);
742
    my $lang_lc = lc $lang;
743
    if (defined $extension) {
744
        $Forced_Extension{$extension} = $lang;
745
 
746
        die_unknown_lang($lang, "--force-lang")
747
            unless $Recognized_Language_lc{$lang_lc}; 
748
 
749
        $Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc};
750
    } else {
751
        # the scary case--count everything as this language
752
        $All_One_Language = $Recognized_Language_lc{$lang_lc};
753
    }
754
}
755
 
756
foreach my $pair (@opt_script_lang) {
757
    my ($lang, $script_name) = split(',', $pair);
758
    my $lang_lc = lc $lang;
759
    if (!defined $script_name) {
760
        die "The --script-lang option requires a comma separated pair of ".
761
            "strings.\n";
762
    }
763
 
764
    die_unknown_lang($lang, "--script-lang")
765
        unless $Recognized_Language_lc{$lang_lc}; 
766
 
767
    $Language_by_Script{$script_name} = $Recognized_Language_lc{$lang_lc};
768
}
769
 
770
# If user provided file extensions to ignore, add these to
771
# the exclusion list.
772
foreach my $ext (map { $_ => 1 } split(/,/, $opt_exclude_ext ) ) {
773
    $ext = lc $ext if $ON_WINDOWS;
774
    $Not_Code_Extension{$ext} = 1;
775
}
776
 
777
# If SQL or --by-file output is requested, keep track of directory names
778
# generated by File::Temp::tempdir and used to temporarily hold the results
779
# of compressed archives.  Contents of the SQL table 't' will be much
780
# cleaner if these meaningless directory names are stripped from the front
781
# of files pulled from the archives.
782
my %TEMP_DIR = ();
783
my $TEMP_OFF =  0;  # Needed for --sdir; keep track of the number of
784
                    # scratch directories made in this run to avoid
785
                    # file overwrites by multiple extractions to same
786
                    # sdir.
787
# Also track locations where temporary installations, if necessary, of
788
# Algorithm::Diff and/or Regexp::Common are done.  Make sure these 
789
# directories are not counted as inputs (ref bug #80 2012-11-23).
790
my %TEMP_INST = ();
791
 
792
# invert %Language_by_Script hash to get an easy-to-look-up list of known
793
# scripting languages
794
my %Script_Language = map { $_ => 1 } values %Language_by_Script ;
795
# 1}}}
796
# Step 2:  Early exits for display, summation. {{{1
797
#
798
print_extension_info(   $opt_show_ext     ) if defined $opt_show_ext ;
799
print_language_info(    $opt_show_lang, '') if defined $opt_show_lang;
800
print_language_filters( $opt_explain      ) if defined $opt_explain  ;
801
exit if (defined $opt_show_ext)  or 
802
        (defined $opt_show_lang) or
803
        (defined $opt_explain);
804
 
805
Top_of_Processing_Loop:
806
# Sorry, coding purists.  Using a goto to implement --count-and-diff
807
# which has to do three passes over the main code, starting with
808
# a clean slate each time.
809
if ($opt_count_diff) {
810
    @ARGV = ( $COUNT_DIFF_ARGV[ $opt_count_diff ] );
811
    if ($opt_count_diff == 3) {
812
        $opt_diff = 1;
813
        @ARGV = @{$COUNT_DIFF_ARGV[ $opt_count_diff ]}; # last arg is list of list
814
    }
815
    if ($opt_report_file) { 
816
        # Instead of just one output file, will have three.
817
        # Keep their names unique otherwise results are clobbered.
818
        if ($opt_count_diff == 3) {
819
            $opt_report_file = $COUNT_DIFF_report_file . ".diff.$ARGV[0].$ARGV[1]";
820
        } else {
821
            $opt_report_file = $COUNT_DIFF_report_file . "." .  $ARGV[0];
822
        }
823
    } else {
824
        # STDOUT; print a header showing what it's working on
825
        if ($opt_count_diff == 3) {
826
            print "\ndiff $ARGV[0] $ARGV[1]::\n";
827
        } else {
828
            print "\n" if $opt_count_diff > 1;
829
            print "$ARGV[0]::\n";
830
        }
831
    }
832
    $ALREADY_SHOWED_HEADER      = 0;
833
    $ALREADY_SHOWED_XML_SECTION = 0;
834
}
835
 
836
#print "Before glob have [", join(",", @ARGV), "]\n";
837
@ARGV = windows_glob(@ARGV) if $ON_WINDOWS;
838
#print "after  glob have [", join(",", @ARGV), "]\n";
839
 
840
# filter out archive files if requested to do so
841
if (defined $opt_skip_archive) {
842
    my @non_archive = ();
843
    foreach my $candidate (@ARGV) {
844
        if ($candidate !~ m/${opt_skip_archive}$/) {
845
            push @non_archive, $candidate;
846
 
847
        } 
848
    }
849
    @ARGV = @non_archive;
850
}
851
 
852
if ($opt_sum_reports and $opt_diff) {
853
    my @results = ();
854
    if ($opt_list_file) { # read inputs from the list file
855
        my @list = read_list_file($opt_list_file);
856
        @results = combine_diffs(\@list);
857
    } else { # get inputs from the command line
858
        @results = combine_diffs(\@ARGV);
859
    }
860
    if ($opt_report_file) {
861
        write_file($opt_report_file, @results);
862
    } else {
863
        print "\n", join("\n", @results), "\n";
864
    }
865
    exit;
866
}
867
if ($opt_sum_reports) {
868
    my %Results = ();
869
    foreach my $type( "by language", "by report file" ) {
870
        my $found_lang = undef; 
871
        if ($opt_list_file) { # read inputs from the list file
872
            my @list = read_list_file($opt_list_file);
873
            $found_lang = combine_results(\@list, 
874
                                           $type, 
875
                                          \%{$Results{ $type }}, 
876
                                          \%Filters_by_Language );
877
        } else { # get inputs from the command line
878
            $found_lang = combine_results(\@ARGV, 
879
                                           $type, 
880
                                          \%{$Results{ $type }}, 
881
                                          \%Filters_by_Language );
882
        }
883
        next unless %Results;
884
        my $end_time = get_time();
885
        my @results  = generate_report($VERSION, $end_time - $start_time,
886
                                       $type,
887
                                      \%{$Results{ $type }}, \%Scale_Factor);
888
        if ($opt_report_file) {
889
            my $ext  = ".lang";
890
               $ext  = ".file" unless $type eq "by language";
891
            next if !$found_lang and  $ext  eq ".lang";
892
            write_file($opt_report_file . $ext, @results);
893
        } else {
894
            print "\n", join("\n", @results), "\n";
895
        }
896
    }
897
    exit;
898
}
899
if ($opt_write_lang_def) {
900
    write_lang_def($opt_write_lang_def   ,
901
                  \%Language_by_Extension,
902
                  \%Language_by_Script   ,
903
                  \%Language_by_File     ,
904
                  \%Filters_by_Language  ,
905
                  \%Not_Code_Extension   ,
906
                  \%Not_Code_Filename    ,
907
                  \%Scale_Factor         ,
908
                  \%EOL_Continuation_re  ,
909
                  );
910
    exit;
911
}
912
if ($opt_show_os) {
913
    if ($ON_WINDOWS) {
914
        print "Windows\n";
915
    } else {
916
        print "UNIX\n";
917
    }
918
    exit;
919
}
920
# 1}}}
921
# Step 3:  Create a list of files to consider. {{{1
922
#  a) If inputs are binary archives, first cd to a temp
923
#     directory, expand the archive with the user-given
924
#     extraction tool, then add the temp directory to
925
#     the list of dirs to process.
926
#  b) Create a list of every file that might contain source
927
#     code.  Ignore binary files, zero-sized files, and
928
#     any file in a directory the user says to exclude.
929
#  c) Determine the language for each file in the list.
930
#
931
my @binary_archive = ();
932
my $cwd            = cwd();
933
if ($opt_extract_with) {
934
#print "cwd main = [$cwd]\n";
935
    my @extract_location = ();
936
    foreach my $bin_file (@ARGV) {
937
        my $extract_dir = undef;
938
        if ($opt_sdir) {
939
            ++$TEMP_OFF;
940
            $extract_dir = "$opt_sdir/$TEMP_OFF";
941
            File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
942
            File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
943
        } else {
944
            $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
945
        }
946
        $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
947
        print "mkdir $extract_dir\n"  if $opt_v;
948
        print "cd    $extract_dir\n"  if $opt_v;
949
        chdir $extract_dir;
950
        my $bin_file_full_path = "";
951
        if (File::Spec->file_name_is_absolute( $bin_file )) {
952
            $bin_file_full_path = $bin_file;
953
#print "bin_file_full_path (was ful) = [$bin_file_full_path]\n";
954
        } else {
955
            $bin_file_full_path = File::Spec->catfile( $cwd, $bin_file );
956
#print "bin_file_full_path (was rel) = [$bin_file_full_path]\n";
957
        }
958
        my     $extract_cmd = uncompress_archive_cmd($bin_file_full_path);
959
        print  $extract_cmd, "\n" if $opt_v;
960
        system $extract_cmd;
961
        push @extract_location, $extract_dir;
962
        chdir $cwd;
963
    }
964
    # It is possible that the binary archive itself contains additional
965
    # files compressed the same way (true for Java .ear files).  Go
966
    # through all the files that were extracted, see if they are binary
967
    # archives and try to extract them.  Lather, rinse, repeat.
968
    my $binary_archives_exist = 1;
969
    my $count_binary_archives = 0;
970
    my $previous_count        = 0;
971
    my $n_pass                = 0;
972
    while ($binary_archives_exist) {
973
        @binary_archive = ();
974
        foreach my $dir (@extract_location) {
975
            find(\&archive_files, $dir);  # populates global @binary_archive
976
        }
977
        foreach my $archive (@binary_archive) {
978
            my $extract_dir = undef;
979
            if ($opt_sdir) {
980
                ++$TEMP_OFF;
981
                $extract_dir = "$opt_sdir/$TEMP_OFF";
982
                File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
983
                File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
984
            } else {
985
                $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
986
            }
987
            $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
988
            print "mkdir $extract_dir\n"  if $opt_v;
989
            print "cd    $extract_dir\n"  if $opt_v;
990
            chdir  $extract_dir;
991
 
992
            my     $extract_cmd = uncompress_archive_cmd($archive);
993
            print  $extract_cmd, "\n" if $opt_v;
994
            system $extract_cmd;
995
            push @extract_location, $extract_dir;
996
            unlink $archive;  # otherwise will be extracting it forever 
997
        }
998
        $count_binary_archives = scalar @binary_archive;
999
        if ($count_binary_archives == $previous_count) {
1000
            $binary_archives_exist = 0;
1001
        }
1002
        $previous_count = $count_binary_archives;
1003
    }
1004
    chdir $cwd;
1005
 
1006
    @ARGV = @extract_location;
1007
} else {
1008
    # see if any of the inputs need to be auto-uncompressed &/or expanded
1009
    my @updated_ARGS = ();
1010
    foreach my $Arg (@ARGV) {
1011
        if (is_dir($Arg)) {
1012
            push @updated_ARGS, $Arg;
1013
            next;
1014
        }
1015
        my $full_path = "";
1016
        if (File::Spec->file_name_is_absolute( $Arg )) {
1017
            $full_path = $Arg;
1018
        } else {
1019
            $full_path = File::Spec->catfile( $cwd, $Arg );
1020
        }
1021
#print "full_path = [$full_path]\n";
1022
        my $extract_cmd = uncompress_archive_cmd($full_path);
1023
        if ($extract_cmd) {
1024
            my $extract_dir = undef;
1025
            if ($opt_sdir) {
1026
                ++$TEMP_OFF;
1027
                $extract_dir = "$opt_sdir/$TEMP_OFF";
1028
                File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
1029
                File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
1030
            } else {
1031
                $extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
1032
            }
1033
            $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
1034
            print "mkdir $extract_dir\n"  if $opt_v;
1035
            print "cd    $extract_dir\n"  if $opt_v;
1036
            chdir  $extract_dir;
1037
            print  $extract_cmd, "\n" if $opt_v;
1038
            system $extract_cmd;
1039
            push @updated_ARGS, $extract_dir;
1040
            chdir $cwd;
1041
        } else {
1042
            # this is a conventional, uncompressed, unarchived file
1043
            # or a directory; keep as-is
1044
            push @updated_ARGS, $Arg;
1045
        }
1046
    }
1047
    @ARGV = @updated_ARGS;
1048
 
1049
    # make sure we're not counting any directory containing
1050
    # temporary installations of Regexp::Common, Algorithm::Diff
1051
    foreach my $d (sort keys %TEMP_INST) {
1052
        foreach my $a (@ARGV) {
1053
            next unless is_dir($a);
1054
            if ($opt_v > 2) {
1055
                printf "Comparing %s (location of %s) to input [%s]\n",
1056
                        $d, $TEMP_INST{$d}, $a;
1057
            }
1058
            if ($a eq $d) {
1059
                die "File::Temp::tempdir chose directory ",
1060
                    $d, " to install ", $TEMP_INST{$d}, " but this ",
1061
                    "matches one of your input directories.  Rerun ",
1062
                    "with --sdir and supply a different temporary ",
1063
                    "directory for ", $TEMP_INST{$d}, "\n";
1064
            }
1065
        }
1066
    }
1067
}
1068
# 1}}}
1069
my @Errors    = ();
1070
my @file_list = ();  # global variable updated in files()
1071
my %Ignored   = ();  # files that are not counted (language not recognized or
1072
                     # problems reading the file)
1073
my @Lines_Out = ();
1074
if ($opt_diff) {
1075
# Step 4:  Separate code from non-code files.  {{{1
1076
my @fh            = ();
1077
my @files_for_set = ();
1078
# make file lists for each separate argument
1079
for (my $i = 0; $i < scalar @ARGV; $i++) {
1080
    push @fh, 
1081
         make_file_list([ $ARGV[$i] ], \%Error_Codes, \@Errors, \%Ignored);
1082
    @{$files_for_set[$i]} = @file_list;
1083
    if ($opt_exclude_list_file) {
1084
        # note: process_exclude_list_file() references global @file_list
1085
        process_exclude_list_file($opt_exclude_list_file, 
1086
                                 \%Exclude_Dir,
1087
                                 \%Ignored);
1088
    }
1089
    @file_list = ();
1090
}
1091
# 1}}}
1092
# Step 5:  Remove duplicate files.             {{{1
1093
#
1094
my %Language           = ();
1095
my %unique_source_file = ();
1096
my $n_set = 0;
1097
foreach my $FH (@fh) {  # loop over each pair of file sets
1098
    ++$n_set;
1099
    remove_duplicate_files($FH, 
1100
                               \%{$Language{$FH}}               , 
1101
                               \%{$unique_source_file{$FH}}     , 
1102
                          \%Error_Codes                         , 
1103
                               \@Errors                         , 
1104
                               \%Ignored                        );
1105
    printf "%2d: %8d unique file%s.                          \r", 
1106
        $n_set,
1107
        plural_form(scalar keys %unique_source_file) 
1108
        unless $opt_quiet;
1109
}
1110
# 1}}}
1111
# Step 6:  Count code, comments, blank lines.  {{{1
1112
#
1113
my %Results_by_Language = ();
1114
my %Results_by_File     = ();
1115
my %Delta_by_Language   = ();
1116
my %Delta_by_File       = ();
1117
 
1118
foreach (my $F = 0; $F < scalar @fh - 1; $F++) { 
1119
    # loop over file sets; do diff between set $F to $F+1
1120
 
1121
    my $nCounted = 0;
1122
 
1123
    my @file_pairs    = ();
1124
    my @files_added   = ();
1125
    my @files_removed = ();
1126
 
1127
    align_by_pairs(\%{$unique_source_file{$fh[$F  ]}}    , # in
1128
                   \%{$unique_source_file{$fh[$F+1]}}    , # in
1129
                   \@files_added                         , # out
1130
                   \@files_removed                       , # out
1131
                   \@file_pairs                          , # out
1132
                   );
1133
    my %already_counted = (); # already_counted{ filename } = 1
1134
 
1135
    if (!@file_pairs) {
1136
        # Special case where all files were either added or deleted.
1137
        # In this case, one of these arrays will be empty: 
1138
        #   @files_added, @files_removed
1139
        # so loop over both to cover both cases.
1140
        my $status = @files_added ? 'added' : 'removed';
1141
        my $offset = @files_added ? 1       : 0        ;
1142
        foreach my $file (@files_added, @files_removed) {
1143
            next unless defined $Language{$fh[$F+$offset]}{$file};
1144
            my $Lang = $Language{$fh[$F+$offset]}{$file};
1145
            next if $Lang eq '(unknown)';
1146
            my ($all_line_count,
1147
                $blank_count   ,
1148
                $comment_count ,
1149
               ) = call_counter($file, $Lang, \@Errors);
1150
            $already_counted{$file} = 1;
1151
            my $code_count = $all_line_count-$blank_count-$comment_count;
1152
            if ($opt_by_file) {
1153
                $Delta_by_File{$file}{'code'   }{$status} += $code_count   ;
1154
                $Delta_by_File{$file}{'blank'  }{$status} += $blank_count  ;
1155
                $Delta_by_File{$file}{'comment'}{$status} += $comment_count;
1156
                $Delta_by_File{$file}{'lang'   }{$status}  = $Lang         ;
1157
                $Delta_by_File{$file}{'nFiles' }{$status} += 1             ;
1158
            }
1159
            $Delta_by_Language{$Lang}{'code'   }{$status} += $code_count   ;
1160
            $Delta_by_Language{$Lang}{'blank'  }{$status} += $blank_count  ;
1161
            $Delta_by_Language{$Lang}{'comment'}{$status} += $comment_count;
1162
            $Delta_by_Language{$Lang}{'nFiles' }{$status} += 1             ;
1163
        }
1164
    }
1165
   #use Data::Dumper::Simple;
1166
   #use Data::Dumper;
1167
   #print Dumper(\@files_added, \@files_removed, \@file_pairs);
1168
    my @alignment = (); # only  used if --diff-alignment
1169
#print "after align_by_pairs:\n";
1170
 
1171
#print "added:\n";
1172
    push @alignment, sprintf "Files added: %d\n", scalar @files_added
1173
        if $opt_diff_alignment;
1174
    foreach my $f (@files_added) {
1175
        next if $already_counted{$f};
1176
#printf "%10s -> %s\n", $f, $Language{$fh[$F+1]}{$f};
1177
        # Don't proceed unless the file (both L and R versions)
1178
        # is in a known language.
1179
        next if $opt_include_lang 
1180
                and not $Include_Language{$Language{$fh[$F+1]}{$f}};
1181
        next if $Language{$fh[$F+1]}{$f} eq "(unknown)";
1182
        next if $Exclude_Language{$Language{$fh[$F+1]}{$f}};
1183
        push @alignment, sprintf "  + %s ; %s\n", $f, $Language{$fh[$F+1]}{$f}
1184
            if $opt_diff_alignment;
1185
        ++$Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'nFiles'}{'added'};
1186
        # Additionally, add contents of file $f to 
1187
        #        Delta_by_File{$f}{comment/blank/code}{'added'}
1188
        #        Delta_by_Language{$lang}{comment/blank/code}{'added'}
1189
        my ($all_line_count,
1190
            $blank_count   ,
1191
            $comment_count ,
1192
           ) = call_counter($f, $Language{$fh[$F+1]}{$f}, \@Errors);
1193
        $Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'comment'}{'added'} +=
1194
            $comment_count;
1195
        $Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'blank'}{'added'}   +=
1196
            $blank_count;
1197
        $Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'code'}{'added'}    +=
1198
            $all_line_count - $blank_count - $comment_count;
1199
        $Delta_by_File{ $f }{'comment'}{'added'} = $comment_count;
1200
        $Delta_by_File{ $f }{'blank'}{'added'}   = $blank_count;
1201
        $Delta_by_File{ $f }{'code'}{'added'}    = 
1202
            $all_line_count - $blank_count - $comment_count;
1203
    }
1204
    push @alignment, "\n";
1205
 
1206
#print "removed:\n";
1207
    push @alignment, sprintf "Files removed: %d\n", scalar @files_removed
1208
        if $opt_diff_alignment;
1209
    foreach my $f (@files_removed) {
1210
        next if $already_counted{$f};
1211
        # Don't proceed unless the file (both L and R versions)
1212
        # is in a known language.
1213
        next if $opt_include_lang 
1214
                and not $Include_Language{$Language{$fh[$F]}{$f}};
1215
        next if $Language{$fh[$F]}{$f} eq "(unknown)";
1216
        next if $Exclude_Language{$Language{$fh[$F]}{$f}};
1217
        ++$Delta_by_Language{ $Language{$fh[$F]}{$f} }{'nFiles'}{'removed'};
1218
        push @alignment, sprintf "  - %s ; %s\n", $f, $Language{$fh[$F]}{$f}
1219
            if $opt_diff_alignment;
1220
#printf "%10s -> %s\n", $f, $Language{$fh[$F  ]}{$f};
1221
        # Additionally, add contents of file $f to 
1222
        #        Delta_by_File{$f}{comment/blank/code}{'removed'}
1223
        #        Delta_by_Language{$lang}{comment/blank/code}{'removed'}
1224
        my ($all_line_count,
1225
            $blank_count   ,
1226
            $comment_count ,
1227
           ) = call_counter($f, $Language{$fh[$F  ]}{$f}, \@Errors);
1228
        $Delta_by_Language{ $Language{$fh[$F  ]}{$f} }{'comment'}{'removed'} +=
1229
            $comment_count;
1230
        $Delta_by_Language{ $Language{$fh[$F  ]}{$f} }{'blank'}{'removed'}   +=
1231
            $blank_count;
1232
        $Delta_by_Language{ $Language{$fh[$F  ]}{$f} }{'code'}{'removed'}    +=
1233
            $all_line_count - $blank_count - $comment_count;
1234
        $Delta_by_File{ $f }{'comment'}{'removed'} = $comment_count;
1235
        $Delta_by_File{ $f }{'blank'}{'removed'}   = $blank_count;
1236
        $Delta_by_File{ $f }{'code'}{'removed'}    = 
1237
            $all_line_count - $blank_count - $comment_count;
1238
    }
1239
    push @alignment, "\n";
1240
 
1241
    my $alignment_pairs_index = scalar @alignment;
1242
    my $n_file_pairs_compared = 0;
1243
    # Don't know ahead of time how many file pairs will be compared
1244
    # since duplicates are weeded out below.  The answer is
1245
    # scalar @file_pairs only if there are no duplicates.
1246
    push @alignment, sprintf "File pairs compared: UPDATE_ME\n"
1247
        if $opt_diff_alignment;
1248
 
1249
    foreach my $pair (@file_pairs) {
1250
        my $file_L = $pair->[0];
1251
        my $file_R = $pair->[1];
1252
        my $Lang_L = $Language{$fh[$F  ]}{$file_L};
1253
        my $Lang_R = $Language{$fh[$F+1]}{$file_R};
1254
#print "main step 6 file_L=$file_L    file_R=$file_R\n";
1255
        ++$nCounted;
1256
        printf "Counting:  %d\r", $nCounted 
1257
            unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate));
1258
        next if $Ignored{$file_L};
1259
        # filter out non-included languages
1260
        if ($opt_include_lang and not $Include_Language{$Lang_L} 
1261
                              and not $Include_Language{$Lang_R}) {
1262
            $Ignored{$file_L} = "--include-lang=$Lang_L";
1263
            $Ignored{$file_R} = "--include-lang=$Lang_R";
1264
            next;
1265
        }
1266
        # filter out excluded or unrecognized languages
1267
        if ($Exclude_Language{$Lang_L} or $Exclude_Language{$Lang_R}) {
1268
            $Ignored{$file_L} = "--exclude-lang=$Lang_L";
1269
            $Ignored{$file_R} = "--exclude-lang=$Lang_R";
1270
            next;
1271
        }
1272
        my $not_Filters_by_Language_Lang_LR = 0;
1273
#print "file_LR = [$file_L] [$file_R]\n";
1274
#print "Lang_LR = [$Lang_L] [$Lang_R]\n";
1275
        if (!(@{$Filters_by_Language{$Lang_L} }) or
1276
            !(@{$Filters_by_Language{$Lang_R} })) {
1277
            $not_Filters_by_Language_Lang_LR = 1;
1278
        }
1279
        if ($not_Filters_by_Language_Lang_LR) {
1280
            if (($Lang_L eq "(unknown)") or ($Lang_R eq "(unknown)")) {
1281
                $Ignored{$fh[$F  ]}{$file_L} = "language unknown (#1)";
1282
                $Ignored{$fh[$F+1]}{$file_R} = "language unknown (#1)";
1283
            } else {
1284
                $Ignored{$fh[$F  ]}{$file_L} = "missing Filters_by_Language{$Lang_L}";
1285
                $Ignored{$fh[$F+1]}{$file_R} = "missing Filters_by_Language{$Lang_R}";
1286
            }
1287
            next;
1288
        }
1289
 
1290
#print "DIFF($file_L, $file_R)\n";
1291
        # step 0: compare the two files' contents 
1292
        chomp ( my @lines_L = read_file($file_L) );
1293
        chomp ( my @lines_R = read_file($file_R) );
1294
        my $language_file_L = "";
1295
        if (defined $Language{$fh[$F]}{$file_L}) {
1296
            $language_file_L = $Language{$fh[$F]}{$file_L};
1297
        } else {
1298
            # files $file_L and $file_R do not contain known language
1299
            next;
1300
        }
1301
        my $contents_are_same = 1;
1302
        if (scalar @lines_L == scalar @lines_R) {
1303
            # same size, must compare line-by-line
1304
            for (my $i = 0; $i < scalar @lines_L; $i++) {
1305
                if ($lines_L[$i] ne $lines_R[$i]) {
1306
                    $contents_are_same = 0;
1307
                    last;
1308
                }
1309
            }
1310
            if ($contents_are_same) {
1311
                ++$Delta_by_Language{$language_file_L}{'nFiles'}{'same'};
1312
            } else {
1313
                ++$Delta_by_Language{$language_file_L}{'nFiles'}{'modified'};
1314
            }
1315
        } else {
1316
            $contents_are_same = 0;
1317
            # different sizes, contents have changed
1318
            ++$Delta_by_Language{$language_file_L}{'nFiles'}{'modified'};
1319
        }
1320
        if ($opt_diff_alignment) {
1321
            my $str =  "$file_L | $file_R ; $language_file_L";
1322
            if ($contents_are_same) {
1323
                push @alignment, "  == $str";
1324
            } else {
1325
                push @alignment, "  != $str";
1326
            }
1327
            ++$n_file_pairs_compared;
1328
        }
1329
 
1330
        # step 1: identify comments in both files
1331
#print "Diff blank removal L language= $Lang_L";
1332
#print " scalar(lines_L)=", scalar @lines_L, "\n";
1333
        my @original_minus_blanks_L 
1334
                    = rm_blanks(  \@lines_L, $Lang_L, \%EOL_Continuation_re);
1335
#print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n";
1336
        @lines_L    = @original_minus_blanks_L;
1337
#print "2: scalar(lines_L)=", scalar @lines_L, "\n";
1338
        @lines_L    = add_newlines(\@lines_L); # compensate for rm_comments()
1339
        @lines_L    = rm_comments( \@lines_L, $Lang_L, $file_L,
1340
                                   \%EOL_Continuation_re);
1341
#print "3: scalar(lines_L)=", scalar @lines_L, "\n";
1342
 
1343
#print "Diff blank removal R language= $Lang_R\n";
1344
        my @original_minus_blanks_R 
1345
                    = rm_blanks(  \@lines_R, $Lang_R, \%EOL_Continuation_re);
1346
        @lines_R    = @original_minus_blanks_R;
1347
        @lines_R    = add_newlines(\@lines_R); # taken away by rm_comments()
1348
        @lines_R    = rm_comments( \@lines_R, $Lang_R, $file_R,
1349
                                   \%EOL_Continuation_re);
1350
 
1351
        my (@diff_LL, @diff_LR, );
1352
        array_diff( $file_L                  ,   # in
1353
                   \@original_minus_blanks_L ,   # in
1354
                   \@lines_L                 ,   # in
1355
                   "comment"                 ,   # in
1356
                   \@diff_LL, \@diff_LR      ,   # out
1357
                   \@Errors);                    # in/out
1358
 
1359
        my (@diff_RL, @diff_RR, );
1360
        array_diff( $file_R                  ,   # in
1361
                   \@original_minus_blanks_R ,   # in
1362
                   \@lines_R                 ,   # in
1363
                   "comment"                 ,   # in
1364
                   \@diff_RL, \@diff_RR      ,   # out
1365
                   \@Errors);                    # in/out
1366
        # each line of each file is now classified as
1367
        # code or comment
1368
 
1369
#use Data::Dumper; 
1370
#print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
1371
#print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, );
1372
#die;
1373
        # step 2: separate code from comments for L and R files
1374
        my @code_L = ();
1375
        my @code_R = ();
1376
        my @comm_L = ();
1377
        my @comm_R = ();
1378
        foreach my $line_info (@diff_LL) {
1379
            if      ($line_info->{'type'} eq "code"   ) {
1380
                push @code_L, $line_info->{char};
1381
            } elsif ($line_info->{'type'} eq "comment") {
1382
                push @comm_L, $line_info->{char};
1383
            } else {
1384
                die "Diff unexpected line type ",
1385
                    $line_info->{'type'}, "for $file_L line ",
1386
                    $line_info->{'lnum'};
1387
            }
1388
        }
1389
        foreach my $line_info (@diff_RL) {
1390
            if      ($line_info->{type} eq "code"   ) {
1391
                push @code_R, $line_info->{'char'};
1392
            } elsif ($line_info->{type} eq "comment") {
1393
                push @comm_R, $line_info->{'char'};
1394
            } else {
1395
                die "Diff unexpected line type ",
1396
                    $line_info->{'type'}, "for $file_R line ",
1397
                    $line_info->{'lnum'};
1398
            }
1399
        }
1400
 
1401
        if ($opt_ignore_whitespace) {
1402
            # strip all whitespace from each line of source code
1403
            # and comments then use these stripped arrays in diffs
1404
            foreach (@code_L) { s/\s+//g }
1405
            foreach (@code_R) { s/\s+//g }
1406
            foreach (@comm_L) { s/\s+//g }
1407
            foreach (@comm_R) { s/\s+//g }
1408
        }
1409
        if ($opt_ignore_case) {
1410
            # change all text to lowercase in diffs
1411
            foreach (@code_L) { $_ = lc }
1412
            foreach (@code_R) { $_ = lc }
1413
            foreach (@comm_L) { $_ = lc }
1414
            foreach (@comm_R) { $_ = lc }
1415
        }
1416
        # step 3: compute code diffs
1417
        array_diff("$file_L v. $file_R"   ,   # in
1418
                   \@code_L               ,   # in
1419
                   \@code_R               ,   # in
1420
                   "revision"             ,   # in
1421
                   \@diff_LL, \@diff_LR   ,   # out
1422
                   \@Errors);                 # in/out
1423
#print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
1424
#print Dumper("diff_LR", \@diff_LR);
1425
        foreach my $line_info (@diff_LR) {
1426
            my $status = $line_info->{'desc'}; # same|added|removed|modified
1427
            ++$Delta_by_Language{$Lang_L}{'code'}{$status};
1428
            if ($opt_by_file) {
1429
                ++$Delta_by_File{$file_L}{'code'}{$status};
1430
            }
1431
        }
1432
#use Data::Dumper;
1433
#print Dumper("code diffs:", \@diff_LL, \@diff_LR);
1434
 
1435
        # step 4: compute comment diffs
1436
        array_diff("$file_L v. $file_R"   ,   # in
1437
                   \@comm_L               ,   # in
1438
                   \@comm_R               ,   # in
1439
                   "revision"             ,   # in
1440
                   \@diff_LL, \@diff_LR   ,   # out
1441
                   \@Errors);                 # in/out
1442
#print Dumper("comment diff_LR", \@diff_LR);
1443
        foreach my $line_info (@diff_LR) {
1444
            my $status = $line_info->{'desc'}; # same|added|removed|modified
1445
            ++$Delta_by_Language{$Lang_L}{'comment'}{$status};
1446
            if ($opt_by_file) {
1447
                ++$Delta_by_File{$file_L}{'comment'}{$status};
1448
            }
1449
        }
1450
#print Dumper("comment diffs:", \@diff_LL, \@diff_LR);
1451
#die; here=  need to save original line number in diff result for html display
1452
 
1453
        # step 5: compute difference in blank lines (kind of pointless)
1454
        next if $Lang_L eq '(unknown)' or 
1455
                $Lang_R eq '(unknown)';
1456
        my ($all_line_count_L,
1457
            $blank_count_L   ,
1458
            $comment_count_L ,
1459
           ) = call_counter($file_L, $Lang_L, \@Errors);
1460
 
1461
        my ($all_line_count_R,
1462
            $blank_count_R   ,
1463
            $comment_count_R ,
1464
           ) = call_counter($file_R, $Lang_R, \@Errors);
1465
 
1466
        if ($blank_count_L <  $blank_count_R) {
1467
            my $D = $blank_count_R - $blank_count_L;
1468
            $Delta_by_Language{$Lang_L}{'blank'}{'added'}   += $D;
1469
        } else {
1470
            my $D = $blank_count_L - $blank_count_R;
1471
            $Delta_by_Language{$Lang_L}{'blank'}{'removed'} += $D;
1472
        }
1473
        if ($opt_by_file) {
1474
            if ($blank_count_L <  $blank_count_R) {
1475
                my $D = $blank_count_R - $blank_count_L;
1476
                $Delta_by_File{$file_L}{'blank'}{'added'}   += $D;
1477
            } else {
1478
                my $D = $blank_count_L - $blank_count_R;
1479
                $Delta_by_File{$file_L}{'blank'}{'removed'} += $D;
1480
            }
1481
        }
1482
 
1483
        my $code_count_L = $all_line_count_L-$blank_count_L-$comment_count_L;
1484
        if ($opt_by_file) {
1485
            $Results_by_File{$file_L}{'code'   } = $code_count_L    ;
1486
            $Results_by_File{$file_L}{'blank'  } = $blank_count_L   ;
1487
            $Results_by_File{$file_L}{'comment'} = $comment_count_L ;
1488
            $Results_by_File{$file_L}{'lang'   } = $Lang_L          ;
1489
            $Results_by_File{$file_L}{'nFiles' } = 1                ;
1490
        } else {
1491
            $Results_by_File{$file_L} = 1;  # just keep track of counted files
1492
        }
1493
 
1494
        $Results_by_Language{$Lang_L}{'nFiles'}++;
1495
        $Results_by_Language{$Lang_L}{'code'}    += $code_count_L   ;
1496
        $Results_by_Language{$Lang_L}{'blank'}   += $blank_count_L  ;
1497
        $Results_by_Language{$Lang_L}{'comment'} += $comment_count_L;
1498
    }
1499
    if ($opt_diff_alignment) {
1500
        $alignment[$alignment_pairs_index] =~ s/UPDATE_ME/$n_file_pairs_compared/;
1501
        write_file($opt_diff_alignment, @alignment);
1502
    }
1503
 
1504
}
1505
#use Data::Dumper;
1506
#print Dumper("Delta_by_Language:"  , \%Delta_by_Language);
1507
#print Dumper("Results_by_Language:", \%Results_by_Language);
1508
#print Dumper("Delta_by_File:"      , \%Delta_by_File);
1509
#print Dumper("Results_by_File:"    , \%Results_by_File);
1510
#die;
1511
my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
1512
write_file($opt_ignored, @ignored_reasons   ) if $opt_ignored;
1513
write_file($opt_counted, sort keys %Results_by_File) if $opt_counted;
1514
# 1}}}
1515
# Step 7:  Assemble results.                   {{{1
1516
#
1517
my $end_time = get_time();
1518
printf "%8d file%s ignored.                           \n", 
1519
    plural_form(scalar keys %Ignored) unless $opt_quiet;
1520
print_errors(\%Error_Codes, \@Errors) if @Errors;
1521
if (!%Delta_by_Language) {
1522
    print "Nothing to count.\n";
1523
    exit;
1524
}
1525
 
1526
if ($opt_by_file) {
1527
    @Lines_Out = diff_report($VERSION, get_time() - $start_time,
1528
                            "by file",
1529
                            \%Delta_by_File, \%Scale_Factor);
1530
} else {
1531
    @Lines_Out = diff_report($VERSION, get_time() - $start_time,
1532
                            "by language",
1533
                            \%Delta_by_Language, \%Scale_Factor);
1534
}
1535
 
1536
# 1}}}
1537
} else {
1538
# Step 4:  Separate code from non-code files.  {{{1
1539
my $fh = 0;
1540
if ($opt_list_file) {
1541
    my @list = read_list_file($opt_list_file);
1542
    $fh = make_file_list(\@list, \%Error_Codes, \@Errors, \%Ignored);
1543
} else {
1544
    $fh = make_file_list(\@ARGV, \%Error_Codes, \@Errors, \%Ignored);
1545
    #     make_file_list populates global variable @file_list via call to 
1546
    #     File::Find's find() which in turn calls files()
1547
}
1548
if ($opt_exclude_list_file) {
1549
    # note: process_exclude_list_file() references global @file_list
1550
    process_exclude_list_file($opt_exclude_list_file, 
1551
                             \%Exclude_Dir,
1552
                             \%Ignored);
1553
}
1554
if ($opt_skip_win_hidden and $ON_WINDOWS) {
1555
    my @file_list_minus_hidded = ();
1556
    # eval code to run on Unix without 'missing Win32::File module' error.
1557
    my $win32_file_invocation = '
1558
        use Win32::File;
1559
        foreach my $F (@file_list) {
1560
            my $attr = undef;
1561
            Win32::File::GetAttributes($F, $attr);
1562
            if ($attr & HIDDEN) {
1563
                $Ignored{$F} = "Windows hidden file";
1564
                print "Ignoring $F since it is a Windows hidden file\n" 
1565
                    if $opt_v > 1;
1566
            } else {
1567
                push @file_list_minus_hidded, $F;
1568
            }
1569
        }';
1570
    eval $win32_file_invocation;
1571
    @file_list = @file_list_minus_hidded;
1572
}
1573
#printf "%8d file%s excluded.                     \n", 
1574
#   plural_form(scalar keys %Ignored) 
1575
#   unless $opt_quiet;
1576
# die print ": ", join("\n: ", @file_list), "\n";
1577
# 1}}}
1578
# Step 5:  Remove duplicate files.             {{{1
1579
#
1580
my %Language           = ();
1581
my %unique_source_file = ();
1582
remove_duplicate_files($fh                          ,   # in 
1583
                           \%Language               ,   # out
1584
                           \%unique_source_file     ,   # out
1585
                      \%Error_Codes                 ,   # in
1586
                           \@Errors                 ,   # out
1587
                           \%Ignored                );  # out
1588
printf "%8d unique file%s.                              \n", 
1589
    plural_form(scalar keys %unique_source_file) 
1590
    unless $opt_quiet;
1591
# 1}}}
1592
# Step 6:  Count code, comments, blank lines.  {{{1
1593
#
1594
 
1595
my %Results_by_Language = ();
1596
my %Results_by_File     = ();
1597
my $nCounted = 0;
1598
foreach my $file (sort keys %unique_source_file) {
1599
    ++$nCounted;
1600
    printf "Counting:  %d\r", $nCounted 
1601
        unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate));
1602
    next if $Ignored{$file};
1603
    if ($opt_include_lang and not $Include_Language{$Language{$file}}) {
1604
        $Ignored{$file} = "--include-lang=$Language{$file}";
1605
        next;
1606
    }
1607
    if ($Exclude_Language{$Language{$file}}) {
1608
        $Ignored{$file} = "--exclude-lang=$Language{$file}";
1609
        next;
1610
    }
1611
    my $Filters_by_Language_Language_file = ! @{$Filters_by_Language{$Language{$file}} };
1612
    if ($Filters_by_Language_Language_file) {
1613
        if ($Language{$file} eq "(unknown)") {
1614
            $Ignored{$file} = "language unknown (#1)";
1615
        } else {
1616
            $Ignored{$file} = "missing Filters_by_Language{$Language{$file}}";
1617
        }
1618
        next;
1619
    }
1620
 
1621
    my ($all_line_count,
1622
        $blank_count   ,
1623
        $comment_count ,
1624
       ) = call_counter($file, $Language{$file}, \@Errors);
1625
    my $code_count = $all_line_count - $blank_count - $comment_count;
1626
    if ($opt_by_file) {
1627
        $Results_by_File{$file}{'code'   } = $code_count     ;
1628
        $Results_by_File{$file}{'blank'  } = $blank_count    ;
1629
        $Results_by_File{$file}{'comment'} = $comment_count  ;
1630
        $Results_by_File{$file}{'lang'   } = $Language{$file};
1631
        $Results_by_File{$file}{'nFiles' } = 1;
1632
    } else {
1633
        $Results_by_File{$file} = 1;  # just keep track of counted files
1634
    }
1635
 
1636
    $Results_by_Language{$Language{$file}}{'nFiles'}++;
1637
    $Results_by_Language{$Language{$file}}{'code'}    += $code_count   ;
1638
    $Results_by_Language{$Language{$file}}{'blank'}   += $blank_count  ;
1639
    $Results_by_Language{$Language{$file}}{'comment'} += $comment_count;
1640
}
1641
my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
1642
write_file($opt_ignored, @ignored_reasons   ) if $opt_ignored;
1643
write_file($opt_counted, sort keys %Results_by_File) if $opt_counted;
1644
# 1}}}
1645
# Step 7:  Assemble results.                   {{{1
1646
#
1647
my $end_time = get_time();
1648
printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored) 
1649
    unless $opt_quiet;
1650
print_errors(\%Error_Codes, \@Errors) if @Errors;
1651
exit unless %Results_by_Language;
1652
 
1653
generate_sql($end_time - $start_time,
1654
            \%Results_by_File, \%Scale_Factor) if $opt_sql;
1655
 
1656
exit if $skip_generate_report;
1657
if      ($opt_by_file_by_lang) {
1658
    push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
1659
                                      "by file",
1660
                                      \%Results_by_File,    \%Scale_Factor);
1661
    push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
1662
                                      "by language",
1663
                                      \%Results_by_Language, \%Scale_Factor);
1664
} elsif ($opt_by_file) {
1665
    push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
1666
                                      "by file",
1667
                                      \%Results_by_File,    \%Scale_Factor);
1668
} else {
1669
    push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
1670
                                      "by language",
1671
                                      \%Results_by_Language, \%Scale_Factor);
1672
}
1673
# 1}}}
1674
}
1675
if ($opt_report_file) { write_file($opt_report_file, @Lines_Out); } 
1676
else                  { print "\n", join("\n", @Lines_Out), "\n"; }
1677
if ($opt_count_diff) {
1678
    ++$opt_count_diff;
1679
    exit if $opt_count_diff > 3;
1680
    goto Top_of_Processing_Loop;
1681
}
1682
 
1683
sub process_exclude_list_file {              # {{{1
1684
    my ($list_file      , # in
1685
        $rh_exclude_dir , # out
1686
        $rh_ignored     , # out
1687
       ) = @_;
1688
    # note: references global @file_list
1689
    print "-> process_exclude_list_file($list_file)\n" if $opt_v > 2;
1690
    # reject a specific set of files and/or directories
1691
    my @reject_list   = read_list_file($list_file);
1692
    my @file_reject_list = ();
1693
    foreach my $F_or_D (@reject_list) {
1694
        if (is_dir($F_or_D)) {
1695
            $rh_exclude_dir->{$F_or_D} = 1;
1696
        } elsif (is_file($F_or_D)) {
1697
            push @file_reject_list, $F_or_D;
1698
        }
1699
    }
1700
 
1701
    # Normalize file names for better comparison.
1702
    my %normalized_input   = normalize_file_names(@file_list);
1703
    my %normalized_reject  = normalize_file_names(@file_reject_list);
1704
    my %normalized_exclude = normalize_file_names(keys %{$rh_exclude_dir});
1705
    foreach my $F (keys %normalized_input) {
1706
        if ($normalized_reject{$F} or is_excluded($F, \%normalized_exclude)) {
1707
            my $orig_F = $normalized_input{$F};
1708
            $rh_ignored->{$orig_F} = "listed in exclusion file $opt_exclude_list_file";
1709
            print "Ignoring $orig_F because it appears in $opt_exclude_list_file\n" 
1710
                if $opt_v > 1;
1711
        }
1712
    }
1713
    print "<- process_exclude_list_file\n" if $opt_v > 2;
1714
} # 1}}}
1715
sub combine_results {                        # {{{1
1716
    # returns 1 if the inputs are categorized by language
1717
    #         0 if no identifiable language was found
1718
    my ($ra_report_files, # in
1719
        $report_type    , # in  "by language" or "by report file"
1720
        $rhh_count      , # out count{TYPE}{nFiles|code|blank|comment|scaled}
1721
        $rhaa_Filters_by_Language , # in
1722
       ) = @_;
1723
 
1724
    print "-> combine_results(report_type=$report_type)\n" if $opt_v > 2;
1725
    my $found_language = 0;
1726
 
1727
    foreach my $file (@{$ra_report_files}) {
1728
        my $IN = new IO::File $file, "r";
1729
        if (!defined $IN) {
1730
            warn "Unable to read $file; ignoring.\n";
1731
            next;
1732
        }
1733
        while (<$IN>) {
1734
            next if /^(http|Language|SUM|-----)/;
1735
            if (!$opt_by_file  and
1736
                m{^(.*?)\s+         # language
1737
                   (\d+)\s+         # files
1738
                   (\d+)\s+         # blank
1739
                   (\d+)\s+         # comments
1740
                   (\d+)\s+         # code
1741
                   (                #    next four entries missing with -nno3
1742
                   x\s+             # x
1743
                   \d+\.\d+\s+      # scale
1744
                   =\s+             # =
1745
                   (\d+\.\d+)\s*    # scaled code
1746
                   )?
1747
                   $}x) {
1748
                if ($report_type eq "by language") {
1749
                    next unless @{$rhaa_Filters_by_Language->{$1}};
1750
                    # above test necessary to avoid trying to sum reports
1751
                    # of reports (which have no language breakdown).
1752
                    $found_language = 1;
1753
                    $rhh_count->{$1   }{'nFiles' } += $2;
1754
                    $rhh_count->{$1   }{'blank'  } += $3;
1755
                    $rhh_count->{$1   }{'comment'} += $4;
1756
                    $rhh_count->{$1   }{'code'   } += $5;
1757
                    $rhh_count->{$1   }{'scaled' } += $7 if $opt_3;
1758
                } else {
1759
                    $rhh_count->{$file}{'nFiles' } += $2;
1760
                    $rhh_count->{$file}{'blank'  } += $3;
1761
                    $rhh_count->{$file}{'comment'} += $4;
1762
                    $rhh_count->{$file}{'code'   } += $5;
1763
                    $rhh_count->{$file}{'scaled' } += $7 if $opt_3;
1764
                }
1765
            } elsif ($opt_by_file  and
1766
                m{^(.*?)\s+         # language
1767
                   (\d+)\s+         # blank
1768
                   (\d+)\s+         # comments
1769
                   (\d+)\s+         # code
1770
                   (                #    next four entries missing with -nno3
1771
                   x\s+             # x
1772
                   \d+\.\d+\s+      # scale
1773
                   =\s+             # =
1774
                   (\d+\.\d+)\s*    # scaled code
1775
                   )?
1776
                   $}x) {
1777
                if ($report_type eq "by language") {
1778
                    next unless %{$rhaa_Filters_by_Language->{$1}};
1779
                    # above test necessary to avoid trying to sum reports
1780
                    # of reports (which have no language breakdown).
1781
                    $found_language = 1;
1782
                    $rhh_count->{$1   }{'nFiles' } +=  1;
1783
                    $rhh_count->{$1   }{'blank'  } += $2;
1784
                    $rhh_count->{$1   }{'comment'} += $3;
1785
                    $rhh_count->{$1   }{'code'   } += $4;
1786
                    $rhh_count->{$1   }{'scaled' } += $6 if $opt_3;
1787
                } else {
1788
                    $rhh_count->{$file}{'nFiles' } +=  1;
1789
                    $rhh_count->{$file}{'blank'  } += $2;
1790
                    $rhh_count->{$file}{'comment'} += $3;
1791
                    $rhh_count->{$file}{'code'   } += $4;
1792
                    $rhh_count->{$file}{'scaled' } += $6 if $opt_3;
1793
                }
1794
            }
1795
        }
1796
    }
1797
    print "<- combine_results\n" if $opt_v > 2;
1798
    return $found_language;
1799
} # 1}}}
1800
sub compute_denominator {                    # {{{1
1801
    my ($method, $nCode, $nComment, $nBlank, ) = @_;
1802
    print "-> compute_denominator\n" if $opt_v > 2;
1803
    my %den        = ( "c" => $nCode );
1804
       $den{"cm"}  = $den{"c"}  + $nComment;
1805
       $den{"cmb"} = $den{"cm"} + $nBlank;
1806
       $den{"cb"}  = $den{"c"}  + $nBlank;
1807
 
1808
    print "<- compute_denominator\n" if $opt_v > 2;
1809
    return $den{ $method };
1810
} # 1}}}
1811
sub diff_report     {                        # {{{1
1812
    # returns an array of lines containing the results
1813
    print "-> diff_report\n" if $opt_v > 2;
1814
 
1815
    if ($opt_xml or $opt_yaml) {
1816
        print "<- diff_report\n" if $opt_v > 2;
1817
        return diff_xml_yaml_report(@_) 
1818
    } elsif ($opt_csv) {
1819
        print "<- diff_report\n" if $opt_v > 2;
1820
        return diff_csv_report(@_) 
1821
    }
1822
 
1823
    my ($version    , # in
1824
        $elapsed_sec, # in
1825
        $report_type, # in  "by language" | "by report file" | "by file"
1826
        $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
1827
        $rh_scale   , # in
1828
       ) = @_;
1829
 
1830
#use Data::Dumper;
1831
#print "diff_report: ", Dumper($rhhh_count), "\n";
1832
    my @results       = ();
1833
 
1834
    my $languages     = ();
1835
    my %sum           = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed}
1836
    my $max_len       = 0;
1837
    foreach my $language (keys %{$rhhh_count}) {
1838
        foreach my $V (qw(nFiles blank comment code)) {
1839
            foreach my $S (qw(added same modified removed)) {
1840
                $rhhh_count->{$language}{$V}{$S} = 0 unless
1841
                    defined $rhhh_count->{$language}{$V}{$S};
1842
                $sum{$V}{$S}  += $rhhh_count->{$language}{$V}{$S};
1843
            }
1844
        }
1845
        $max_len      = length($language) if length($language) > $max_len;
1846
    }
1847
    my $column_1_offset = 0;
1848
       $column_1_offset = $max_len - 17 if $max_len > 17;
1849
    $elapsed_sec = 0.5 unless $elapsed_sec;
1850
 
1851
    my $spacing_0 = 23;
1852
    my $spacing_1 = 13;
1853
    my $spacing_2 =  9;
1854
    my $spacing_3 = 17;
1855
    if (!$opt_3) {
1856
        $spacing_1 = 19;
1857
        $spacing_2 = 14;
1858
        $spacing_3 = 28;
1859
    }
1860
    $spacing_0 += $column_1_offset;
1861
    $spacing_1 += $column_1_offset;
1862
    $spacing_3 += $column_1_offset;
1863
    my %Format = (
1864
        '1' => { 'xml' => 'name="%s" ',
1865
                 'txt' => "\%-${spacing_0}s ",
1866
               },
1867
        '2' => { 'xml' => 'name="%s" ',
1868
                 'txt' => "\%-${spacing_3}s ",
1869
               },
1870
        '3' => { 'xml' => 'files_count="%d" ',
1871
                 'txt' => '%5d ',
1872
               },
1873
        '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
1874
                 'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
1875
               },
1876
        '5' => { 'xml' => 'blank="%.2f" comment="%.2f" code="%d" ',
1877
                 'txt' => "\%3.2f \%3.2f \%${spacing_2}d",
1878
               },
1879
        '6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
1880
                 'txt' => ' x %6.2f = %14.2f',
1881
               },
1882
    );
1883
    my $Style = "txt";
1884
       $Style = "xml" if $opt_xml ;
1885
       $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
1886
       $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
1887
 
1888
    my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
1889
       $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset) 
1890
            if (!$opt_3) and (68 + $column_1_offset) > 79;
1891
    my $data_line  = "";
1892
    my $first_column;
1893
    my $BY_LANGUAGE = 0;
1894
    my $BY_FILE     = 0;
1895
    if      ($report_type eq "by language") {
1896
        $first_column = "Language";
1897
        $BY_LANGUAGE  = 1;
1898
    } elsif ($report_type eq "by file")     {
1899
        $first_column = "File";
1900
        $BY_FILE      = 1;
1901
    } else {
1902
        $first_column = "Report File";
1903
    }
1904
 
1905
    my $header_line  = sprintf "%s v %s", $URL, $version;
1906
    my $sum_files    = 1;
1907
    my $sum_lines    = 1;
1908
       $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
1909
                        $elapsed_sec           ,
1910
                        $sum_files/$elapsed_sec,
1911
                        $sum_lines/$elapsed_sec) unless $opt_sum_reports;
1912
    if ($Style eq "txt") {
1913
        push @results, output_header($header_line, $hyphen_line, $BY_FILE);
1914
    } elsif ($Style eq "csv") {
1915
        die "csv";
1916
    }
1917
 
1918
    # column headers
1919
    if (!$opt_3 and $BY_FILE) {
1920
        my $spacing_n = $spacing_1 - 11;
1921
        $data_line  = sprintf "%-${spacing_n}s" , $first_column;
1922
    } else {
1923
        $data_line  = sprintf "%-${spacing_1}s ", $first_column;
1924
    }
1925
    if ($BY_FILE) {
1926
        $data_line .= sprintf "%${spacing_2}s"   , ""     ;
1927
    } else {
1928
        $data_line .= sprintf "%${spacing_2}s "  , "files";
1929
    }
1930
    my $PCT_symbol = "";
1931
       $PCT_symbol = " \%" if $opt_by_percent;
1932
    $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
1933
        "blank${PCT_symbol}"         ,
1934
        "comment${PCT_symbol}"       ,
1935
        "code";
1936
 
1937
    if ($Style eq "txt") {
1938
        push @results, $data_line;
1939
        push @results, $hyphen_line;
1940
    }
1941
 
1942
####foreach my $lang_or_file (keys %{$rhhh_count}) {
1943
####    $rhhh_count->{$lang_or_file}{'code'} = 0 unless 
1944
####        defined $rhhh_count->{$lang_or_file}{'code'};
1945
####}
1946
    foreach my $lang_or_file (sort {
1947
                                 $rhhh_count->{$b}{'code'} <=>
1948
                                 $rhhh_count->{$a}{'code'}
1949
                               }
1950
                          keys %{$rhhh_count}) {
1951
 
1952
        if ($BY_FILE) {
1953
            push @results, rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
1954
        } else {
1955
            push @results, $lang_or_file;
1956
        }
1957
        foreach my $S (qw(same modified added removed)) {
1958
            my $indent = $spacing_1 - 2;
1959
            my $line .= sprintf " %-${indent}s", $S;
1960
            if ($BY_FILE) {
1961
                $line .= sprintf "   ";
1962
            } else {
1963
                $line .= sprintf "  %${spacing_2}s", $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
1964
            }
1965
            if ($opt_by_percent) {
1966
                my $DEN = compute_denominator($opt_by_percent  ,
1967
                    $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
1968
                    $rhhh_count->{$lang_or_file}{'comment'}{$S},
1969
                    $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
1970
                if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
1971
                    $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
1972
                        $rhhh_count->{$lang_or_file}{'blank'}{$S}   / $DEN * 100,
1973
                        $rhhh_count->{$lang_or_file}{'comment'}{$S} / $DEN * 100,
1974
                        $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
1975
                } else {
1976
                    $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
1977
                        0.0, 0.0, $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
1978
                }
1979
            } else {
1980
                $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
1981
                    $rhhh_count->{$lang_or_file}{'blank'}{$S}   ,
1982
                    $rhhh_count->{$lang_or_file}{'comment'}{$S} ,
1983
                    $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
1984
            }
1985
            push @results, $line;
1986
        }
1987
    }
1988
    push @results, "-" x 79;
1989
    push @results, "SUM:";
1990
    foreach my $S (qw(same modified added removed)) {
1991
        my $indent = $spacing_1 - 2;
1992
        my $line .= sprintf " %-${indent}s", $S;
1993
            if ($BY_FILE) {
1994
                $line .= sprintf "   ";
1995
            } else {
1996
                $line .= sprintf "  %${spacing_2}s", $sum{'nFiles'}{$S};
1997
            }
1998
        if ($opt_by_percent) {
1999
            my $DEN = compute_denominator($opt_by_percent,
2000
                $sum{'code'}{$S}, $sum{'comment'}{$S}, $sum{'blank'}{$S});
2001
            if ($sum{'code'}{$S} > 0) {
2002
                $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
2003
                    $sum{'blank'}{$S}   / $DEN * 100,
2004
                    $sum{'comment'}{$S} / $DEN * 100,
2005
                    $sum{'code'}{$S}    ;
2006
            } else {
2007
                $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
2008
                    0.0, 0.0, $sum{'code'}{$S}    ;
2009
            }
2010
        } else {
2011
            $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
2012
                $sum{'blank'}{$S}   ,
2013
                $sum{'comment'}{$S} ,
2014
                $sum{'code'}{$S}    ;
2015
        }
2016
        push @results, $line;
2017
    }
2018
    push @results, "-" x 79;
2019
    write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
2020
    print "<- diff_report\n" if $opt_v > 2;
2021
 
2022
    return @results;
2023
} # 1}}}
2024
sub xml_or_yaml_header {                     # {{{1
2025
    my ($URL, $version, $elapsed_sec, $sum_files, $sum_lines, $by_file) = @_;
2026
    print "-> xml_or_yaml_header\n" if $opt_v > 2;
2027
    my $header      = "";
2028
    my $file_rate   = $sum_files/$elapsed_sec;
2029
    my $line_rate   = $sum_lines/$elapsed_sec;
2030
    my $type        = ""; 
2031
       $type        = "diff_" if $opt_diff;
2032
    my $report_file = "";
2033
    if ($opt_report_file) {
2034
        if ($opt_sum_reports) {
2035
            if ($by_file) {
2036
                $report_file = "  <report_file>$opt_report_file.file</report_file>"
2037
            } else {
2038
                $report_file = "  <report_file>$opt_report_file.lang</report_file>"
2039
            }
2040
        } else {
2041
            $report_file = "  <report_file>$opt_report_file</report_file>"
2042
        }
2043
    }
2044
    if ($opt_xml) {
2045
        $header = "<?xml version=\"1.0\"?>";
2046
        $header .= "\n<?xml-stylesheet type=\"text/xsl\" href=\"" . $opt_xsl . "\"?>" if $opt_xsl;
2047
        $header .= "<${type}results>
2048
<header>
2049
  <cloc_url>$URL</cloc_url>
2050
  <cloc_version>$version</cloc_version>
2051
  <elapsed_seconds>$elapsed_sec</elapsed_seconds>
2052
  <n_files>$sum_files</n_files>
2053
  <n_lines>$sum_lines</n_lines>
2054
  <files_per_second>$file_rate</files_per_second>
2055
  <lines_per_second>$line_rate</lines_per_second>";
2056
        $header .= "\n$report_file"
2057
            if $opt_report_file;
2058
        $header .= "\n</header>";
2059
    } elsif ($opt_yaml) {
2060
        $header = "---\n# $URL
2061
header :
2062
  cloc_url           : http://cloc.sourceforge.net
2063
  cloc_version       : $version
2064
  elapsed_seconds    : $elapsed_sec
2065
  n_files            : $sum_files
2066
  n_lines            : $sum_lines
2067
  files_per_second   : $file_rate
2068
  lines_per_second   : $line_rate";
2069
        if ($opt_report_file) {
2070
            if ($opt_sum_reports) {
2071
                if ($by_file) {
2072
                    $header .= "\n  report_file        : $opt_report_file.file"
2073
                } else {
2074
                    $header .= "\n  report_file        : $opt_report_file.lang"
2075
                }
2076
            } else {
2077
                $header .= "\n  report_file        : $opt_report_file";
2078
            }
2079
        }
2080
    }
2081
    print "<- xml_or_yaml_header\n" if $opt_v > 2;
2082
    return $header;
2083
} # 1}}}
2084
sub diff_xml_yaml_report {                   # {{{1
2085
    # returns an array of lines containing the results
2086
    my ($version    , # in
2087
        $elapsed_sec, # in
2088
        $report_type, # in  "by language" | "by report file" | "by file"
2089
        $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
2090
        $rh_scale   , # in
2091
       ) = @_;
2092
    print "-> diff_xml_yaml_report\n" if $opt_v > 2;
2093
 
2094
#print "diff_report: ", Dumper($rhhh_count), "\n";
2095
    my @results       = ();
2096
 
2097
    my $languages     = ();
2098
    my %sum           = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed}
2099
 
2100
    my $sum_files = 0;
2101
    my $sum_lines = 0;
2102
    foreach my $language (keys %{$rhhh_count}) {
2103
        foreach my $V (qw(nFiles blank comment code)) {
2104
            foreach my $S (qw(added same modified removed)) {
2105
                $rhhh_count->{$language}{$V}{$S} = 0 unless
2106
                    defined $rhhh_count->{$language}{$V}{$S};
2107
                $sum{$V}{$S}  += $rhhh_count->{$language}{$V}{$S};
2108
                if ($V eq "nFiles") {
2109
                    $sum_files += $rhhh_count->{$language}{$V}{$S};
2110
                } else {
2111
                    $sum_lines += $rhhh_count->{$language}{$V}{$S};
2112
                }
2113
            }
2114
        }
2115
    }
2116
    $elapsed_sec = 0.5 unless $elapsed_sec;
2117
 
2118
    my $data_line   = "";
2119
    my $BY_LANGUAGE = 0;
2120
    my $BY_FILE     = 0;
2121
    if      ($report_type eq "by language") {
2122
        $BY_LANGUAGE  = 1;
2123
    } elsif ($report_type eq "by file")     {
2124
        $BY_FILE      = 1;
2125
    }
2126
 
2127
    if (!$ALREADY_SHOWED_HEADER) {
2128
        push @results,
2129
              xml_or_yaml_header($URL, $version, $elapsed_sec, 
2130
                                 $sum_files, $sum_lines, $BY_FILE);
2131
        $ALREADY_SHOWED_HEADER = 1;
2132
    }
2133
 
2134
    foreach my $S (qw(same modified added removed)) {
2135
        if ($opt_xml) {
2136
            push @results, "  <$S>";
2137
        } elsif ($opt_yaml) {
2138
            push @results, "$S :";
2139
        }
2140
########foreach my $lang_or_file (keys %{$rhhh_count}) {
2141
########    $rhhh_count->{$lang_or_file}{'code'} = 0 unless 
2142
########        defined $rhhh_count->{$lang_or_file}{'code'};
2143
########}
2144
        foreach my $lang_or_file (sort {
2145
                                     $rhhh_count->{$b}{'code'} <=>
2146
                                     $rhhh_count->{$a}{'code'}
2147
                                   }
2148
                              keys %{$rhhh_count}) {
2149
            my $L = "";
2150
            if ($opt_xml) {
2151
                if ($BY_FILE) {
2152
                    $L .= sprintf "    <file name=\"%s\" files_count=\"1\" ", 
2153
                        xml_metachars(
2154
                            rm_leading_tempdir($lang_or_file, \%TEMP_DIR));
2155
                } else {
2156
                    $L .= sprintf "    <language name=\"%s\" files_count=\"%d\" ",
2157
                            $lang_or_file ,
2158
                            $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
2159
                }
2160
                if ($opt_by_percent) {
2161
                  my $DEN = compute_denominator($opt_by_percent            ,
2162
                                $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
2163
                                $rhhh_count->{$lang_or_file}{'comment'}{$S},
2164
                                $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
2165
                  foreach my $T (qw(blank comment)) {
2166
                      if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
2167
                        $L .= sprintf "%s=\"%.2f\" ", 
2168
                                $T, $rhhh_count->{$lang_or_file}{$T}{$S} / $DEN * 100;
2169
                      } else {
2170
                        $L .= sprintf "%s=\"0.0\" ", $T;
2171
                      }
2172
                  }
2173
                  foreach my $T (qw(code)) {
2174
                      $L .= sprintf "%s=\"%d\" ", 
2175
                              $T, $rhhh_count->{$lang_or_file}{$T}{$S};
2176
                  }
2177
                } else {
2178
                  foreach my $T (qw(blank comment code)) {
2179
                      $L .= sprintf "%s=\"%d\" ", 
2180
                              $T, $rhhh_count->{$lang_or_file}{$T}{$S};
2181
                  }
2182
                }
2183
                push @results, $L . "/>";
2184
            } elsif ($opt_yaml) {
2185
                if ($BY_FILE) {
2186
                    push @results, sprintf "  - file : %s", 
2187
                                   rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
2188
                    push @results, sprintf "    files_count : 1", 
2189
                } else {
2190
                    push @results, sprintf "  - language : %s", $lang_or_file;
2191
                    push @results, sprintf "    files_count : %d", 
2192
                            $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
2193
                }
2194
                if ($opt_by_percent) {
2195
                    my $DEN = compute_denominator($opt_by_percent  ,
2196
                        $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
2197
                        $rhhh_count->{$lang_or_file}{'comment'}{$S},
2198
                        $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
2199
                    foreach my $T (qw(blank comment)) {
2200
                        if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
2201
                            push @results, sprintf "    %s : %.2f", 
2202
                                    $T, $rhhh_count->{$lang_or_file}{$T}{$S} / $DEN * 100;
2203
                        } else {
2204
                            push @results, sprintf "    %s : 0.0", $T;
2205
                        }
2206
                    }
2207
                    foreach my $T (qw(code)) {
2208
                        push @results, sprintf "    %s : %d", 
2209
                                $T, $rhhh_count->{$lang_or_file}{$T}{$S};
2210
                    }
2211
                } else {
2212
                    foreach my $T (qw(blank comment code)) {
2213
                        push @results, sprintf "    %s : %d", 
2214
                                $T, $rhhh_count->{$lang_or_file}{$T}{$S};
2215
                    }
2216
                }
2217
            }
2218
        }
2219
 
2220
        if ($opt_xml) {
2221
            my $L = sprintf "    <total sum_files=\"%d\" ", $sum{'nFiles'}{$S};
2222
            if ($opt_by_percent) {
2223
              my $DEN = compute_denominator($opt_by_percent,
2224
                            $sum{'code'}{$S}   , 
2225
                            $sum{'comment'}{$S}, 
2226
                            $sum{'blank'}{$S}  );
2227
              foreach my $V (qw(blank comment)) {
2228
                  if ($sum{'code'}{$S} > 0) {
2229
                      $L .= sprintf "%s=\"%.2f\" ", $V, $sum{$V}{$S} / $DEN * 100;
2230
                  } else {
2231
                      $L .= sprintf "%s=\"0.0\" ", $V;
2232
                  }
2233
              }
2234
              foreach my $V (qw(code)) {
2235
                  $L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
2236
              }
2237
            } else {
2238
              foreach my $V (qw(blank comment code)) {
2239
                  $L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
2240
              }
2241
            }
2242
            push @results, $L . "/>";
2243
            push @results, "  </$S>";
2244
        } elsif ($opt_yaml) {
2245
            push @results, sprintf "%s_total :\n    sum_files : %d", 
2246
                                $S, $sum{'nFiles'}{$S};
2247
            if ($opt_by_percent) {
2248
                my $DEN = compute_denominator($opt_by_percent    ,
2249
                                              $sum{'code'}{$S}   ,
2250
                                              $sum{'comment'}{$S},
2251
                                              $sum{'blank'}{$S}  );
2252
                foreach my $V (qw(blank comment)) {
2253
                    if ($sum{'code'}{$S} > 0) {
2254
                        push @results, sprintf "    %s : %.2f", $V, $sum{$V}{$S} / $DEN * 100;
2255
                    } else {
2256
                        push @results, sprintf "    %s : 0.0", $V;
2257
                    }
2258
                }
2259
                foreach my $V (qw(code)) {
2260
                    push @results, sprintf "    %s : %d", $V, $sum{$V}{$S};
2261
                }
2262
            } else {
2263
                foreach my $V (qw(blank comment code)) {
2264
                    push @results, sprintf "    %s : %d", $V, $sum{$V}{$S};
2265
                }
2266
            }
2267
        }
2268
    }
2269
 
2270
    if ($opt_xml) {
2271
        push @results, "</diff_results>";
2272
    }
2273
    write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
2274
    print "<- diff_xml_yaml_report\n" if $opt_v > 2;
2275
    return @results;
2276
} # 1}}}
2277
sub diff_csv_report {                        # {{{1
2278
    # returns an array of lines containing the results
2279
    my ($version    , # in
2280
        $elapsed_sec, # in
2281
        $report_type, # in  "by language" | "by report file" | "by file"
2282
        $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
2283
        $rh_scale   , # in  unused
2284
       ) = @_;
2285
    print "-> diff_csv_report\n" if $opt_v > 2;
2286
 
2287
#use Data::Dumper;
2288
#print "diff_csv_report: ", Dumper($rhhh_count), "\n";
2289
#die;
2290
    my @results       = ();
2291
    my $languages     = ();
2292
 
2293
    my $data_line   = "";
2294
    my $BY_LANGUAGE = 0;
2295
    my $BY_FILE     = 0;
2296
    if      ($report_type eq "by language") {
2297
        $BY_LANGUAGE  = 1;
2298
    } elsif ($report_type eq "by file")     {
2299
        $BY_FILE      = 1;
2300
    }
2301
    my $DELIM = ",";
2302
       $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
2303
 
2304
    $elapsed_sec = 0.5 unless $elapsed_sec;
2305
 
2306
    my $line = "Language${DELIM} ";
2307
       $line = "File${DELIM} " if $BY_FILE;
2308
    foreach my $item (qw(files blank comment code)) {
2309
        next if $BY_FILE and $item eq 'files';
2310
        foreach my $symbol ( '==', '!=', '+', '-', ) {
2311
            $line .= "$symbol $item${DELIM} ";
2312
        }
2313
    }
2314
    $line .= "\"$URL v $version T=$elapsed_sec s\"";
2315
    push @results, $line;
2316
 
2317
    foreach my $lang_or_file (keys %{$rhhh_count}) {
2318
        $rhhh_count->{$lang_or_file}{'code'}{'added'} = 0 unless 
2319
            defined $rhhh_count->{$lang_or_file}{'code'};
2320
    }
2321
    foreach my $lang_or_file (sort {
2322
                                 $rhhh_count->{$b}{'code'} <=>
2323
                                 $rhhh_count->{$a}{'code'}
2324
                               }
2325
                          keys %{$rhhh_count}) {
2326
        if ($BY_FILE) {
2327
            $line = rm_leading_tempdir($lang_or_file, \%TEMP_DIR) . "$DELIM ";
2328
        } else {
2329
            $line = $lang_or_file . "${DELIM} ";
2330
        }
2331
        if ($opt_by_percent) {
2332
          foreach my $item (qw(nFiles)) {
2333
              next if $BY_FILE and $item eq 'nFiles';
2334
              foreach my $symbol (qw(same modified added removed)) {
2335
                  if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
2336
                      $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
2337
                  } else {
2338
                      $line .= "0${DELIM} ";
2339
                  }
2340
              }
2341
          }
2342
          foreach my $item (qw(blank comment)) {
2343
              foreach my $symbol (qw(same modified added removed)) {
2344
                  if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol} and
2345
                      defined $rhhh_count->{$lang_or_file}{'code'}{$symbol} and
2346
                      $rhhh_count->{$lang_or_file}{'code'}{$symbol} > 0) {
2347
                      $line .= sprintf("%.2f", $rhhh_count->{$lang_or_file}{$item}{$symbol} / $rhhh_count->{$lang_or_file}{'code'}{$symbol} * 100).${DELIM};
2348
                  } else {
2349
                      $line .= "0.00${DELIM} ";
2350
                  }
2351
              }
2352
          }
2353
          foreach my $item (qw(code)) {
2354
              foreach my $symbol (qw(same modified added removed)) {
2355
                  if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
2356
                      $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
2357
                  } else {
2358
                      $line .= "0${DELIM} ";
2359
                  }
2360
              }
2361
          }
2362
        } else {
2363
          foreach my $item (qw(nFiles blank comment code)) {
2364
              next if $BY_FILE and $item eq 'nFiles';
2365
              foreach my $symbol (qw(same modified added removed)) {
2366
                  if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
2367
                      $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
2368
                  } else {
2369
                      $line .= "0${DELIM} ";
2370
                  }
2371
              }
2372
          }
2373
        }
2374
        push @results, $line;
2375
    }
2376
 
2377
    print "<- diff_csv_report\n" if $opt_v > 2;
2378
    return @results;
2379
} # 1}}}
2380
sub rm_leading_tempdir {                     # {{{1
2381
    my ($in_file, $rh_temp_dirs, ) = @_;
2382
    my $clean_filename = $in_file;
2383
    foreach my $temp_d (keys %{$rh_temp_dirs}) {
2384
        if ($ON_WINDOWS) {
2385
        # \ -> / necessary to allow the next if test's
2386
        # m{} to work in the presence of spaces in file names
2387
            $temp_d         =~ s{\\}{/}g;
2388
            $clean_filename =~ s{\\}{/}g;
2389
        }
2390
        if ($clean_filename =~ m{^$temp_d/}) {
2391
            $clean_filename =~ s{^$temp_d/}{};
2392
            last;
2393
        }
2394
    }
2395
    $clean_filename =~ s{/}{\\}g if $ON_WINDOWS; # then go back from / to \
2396
    return $clean_filename;
2397
} # 1}}}
2398
sub generate_sql    {                        # {{{1
2399
    my ($elapsed_sec, # in
2400
        $rhh_count  , # in  count{TYPE}{lang|code|blank|comment|scaled}
2401
        $rh_scale   , # in
2402
       ) = @_;
2403
    print "-> generate_sql\n" if $opt_v > 2;
2404
 
2405
    $opt_sql_project = cwd() unless defined $opt_sql_project;
2406
    $opt_sql_project =~ s{/}{\\}g if $ON_WINDOWS;
2407
 
2408
    my $schema = undef;
2409
    if ($opt_sql_style eq "oracle") {
2410
        $schema = "
2411
CREATE TABLE metadata
2412
(
2413
  timestamp   TIMESTAMP,
2414
  project     VARCHAR2(500 CHAR),
2415
  elapsed_s   NUMBER(10, 6)
2416
) 
2417
/
2418
 
2419
CREATE TABLE t
2420
( 
2421
  project        VARCHAR2(500 CHAR),
2422
  language       VARCHAR2(500 CHAR),
2423
  file_fullname  VARCHAR2(500 CHAR),
2424
  file_dirname   VARCHAR2(500 CHAR),
2425
  file_basename  VARCHAR2(500 CHAR),
2426
  nblank         INTEGER,
2427
  ncomment       INTEGER,
2428
  ncode          INTEGER,
2429
  nscaled        NUMBER(10, 6)
2430
)
2431
/
2432
 
2433
";
2434
    } else {
2435
        $schema = "
2436
create table metadata (          -- $URL v $VERSION
2437
                timestamp varchar(500),    
2438
                Project   varchar(500),    
2439
                elapsed_s real);   
2440
create table t        (
2441
                Project       varchar(500)   ,  
2442
                Language      varchar(500)   ,  
2443
                File          varchar(500)   ,  
2444
                File_dirname  varchar(500)   ,  
2445
                File_basename varchar(500)   ,  
2446
                nBlank        integer        ,  
2447
                nComment      integer        ,  
2448
                nCode         integer        ,  
2449
                nScaled       real           ); 
2450
";
2451
    }
2452
    $opt_sql = "-" if $opt_sql eq "1";
2453
 
2454
    my $open_mode = ">";
2455
       $open_mode = ">>" if $opt_sql_append;
2456
 
2457
    my $fh = new IO::File; # $opt_sql, "w";
2458
    if (!$fh->open("${open_mode}${opt_sql}")) {
2459
        die "Unable to write to $opt_sql  $!\n";
2460
    }
2461
    print $fh $schema unless defined $opt_sql_append;
2462
 
2463
    if ($opt_sql_style eq "oracle") {
2464
        printf $fh "insert into metadata values(TO_TIMESTAMP('%s','yyyy-mm-dd hh24:mi:ss'), '%s', %f);\n",
2465
                    strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
2466
                    $opt_sql_project, $elapsed_sec;
2467
    } else {
2468
        print $fh "begin transaction;\n";
2469
        printf $fh "insert into metadata values('%s', '%s', %f);\n",
2470
                    strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
2471
                    $opt_sql_project, $elapsed_sec;
2472
    }
2473
 
2474
    my $nIns = 0;
2475
    foreach my $file (keys %{$rhh_count}) {
2476
        my $language = $rhh_count->{$file}{'lang'};
2477
        my $clean_filename = $file;
2478
        # If necessary (that is, if the input contained an
2479
        # archive file [.tar.gz, etc]), strip the temporary
2480
        # directory name which was used to expand the archive
2481
        # from the file name.
2482
 
2483
        $clean_filename = rm_leading_tempdir($clean_filename, \%TEMP_DIR);
2484
        $clean_filename =~ s/\'/''/g;  # double embedded single quotes
2485
                                       # to escape them 
2486
 
2487
        printf $fh "insert into t values('%s', '%s', '%s', '%s', '%s', " .
2488
                   "%d, %d, %d, %f);\n",
2489
                    $opt_sql_project           ,
2490
                    $language                  ,
2491
                    $clean_filename            ,
2492
                    dirname( $clean_filename)  ,
2493
                    basename($clean_filename)  ,
2494
                    $rhh_count->{$file}{'blank'},
2495
                    $rhh_count->{$file}{'comment'},
2496
                    $rhh_count->{$file}{'code'}   ,
2497
                    $rhh_count->{$file}{'code'}*$rh_scale->{$language};
2498
 
2499
        ++$nIns;
2500
        if (!($nIns % 10_000) and ($opt_sql_style ne "oracle")) {
2501
            print $fh "commit;\n";
2502
            print $fh "begin transaction;\n";
2503
        }
2504
    }
2505
    if ($opt_sql_style ne "oracle") {
2506
        print $fh "commit;\n";
2507
    }
2508
 
2509
    $fh->close unless $opt_sql eq "-"; # don't try to close STDOUT
2510
    print "<- generate_sql\n" if $opt_v > 2;
2511
 
2512
    # sample query:
2513
    #
2514
    #   select project, language, 
2515
    #          sum(nCode)     as Code, 
2516
    #          sum(nComment)  as Comments, 
2517
    #          sum(nBlank)    as Blank,  
2518
    #          sum(nCode)+sum(nComment)+sum(nBlank) as All_Lines,
2519
    #          100.0*sum(nComment)/(sum(nCode)+sum(nComment)) as Comment_Pct
2520
    #          from t group by Project, Language order by Project, Code desc;
2521
    #
2522
} # 1}}}
2523
sub output_header   {                        # {{{1
2524
    my ($header_line, 
2525
        $hyphen_line,
2526
        $BY_FILE    ,)    = @_;
2527
    print "-> output_header\n" if $opt_v > 2;
2528
    my @R = ();
2529
    if      ($opt_xml) {
2530
        if (!$ALREADY_SHOWED_XML_SECTION) {
2531
            push @R, "<?xml version=\"1.0\"?>";
2532
            push @R, '<?xml-stylesheet type="text/xsl" href="' .
2533
                            $opt_xsl . '"?>' if $opt_xsl;
2534
            push @R, "<results>";
2535
            push @R, "<header>$header_line</header>";
2536
            $ALREADY_SHOWED_XML_SECTION = 1;
2537
        }
2538
        if ($BY_FILE) {
2539
            push @R, "<files>";
2540
        } else {
2541
            push @R, "<languages>";
2542
        }
2543
    } elsif ($opt_yaml) {
2544
        push @R, "---\n# $header_line";
2545
    } elsif ($opt_csv) {
2546
        # append the header to the end of the column headers
2547
        # to keep the output a bit cleaner from a spreadsheet
2548
        # perspective
2549
    } else {
2550
        if ($ALREADY_SHOWED_HEADER) {
2551
            push @R, "";
2552
        } else {
2553
            push @R, $header_line;
2554
            $ALREADY_SHOWED_HEADER = 1;
2555
        }
2556
        push @R, $hyphen_line;
2557
    }
2558
    print "<- output_header\n" if $opt_v > 2;
2559
    return @R;
2560
} # 1}}}
2561
sub generate_report {                        # {{{1
2562
    # returns an array of lines containing the results
2563
    my ($version    , # in
2564
        $elapsed_sec, # in
2565
        $report_type, # in  "by language" | "by report file" | "by file"
2566
        $rhh_count  , # in  count{TYPE}{nFiles|code|blank|comment|scaled}
2567
        $rh_scale   , # in
2568
       ) = @_;
2569
 
2570
    print "-> generate_report\n" if $opt_v > 2;
2571
    my $DELIM = ",";
2572
       $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
2573
 
2574
    my @results       = ();
2575
 
2576
    my $languages     = ();
2577
 
2578
    my $sum_files     = 0;
2579
    my $sum_code      = 0;
2580
    my $sum_blank     = 0;
2581
    my $sum_comment   = 0;
2582
    my $max_len       = 0;
2583
    foreach my $language (keys %{$rhh_count}) {
2584
        $sum_files   += $rhh_count->{$language}{'nFiles'} ;
2585
        $sum_blank   += $rhh_count->{$language}{'blank'}  ;
2586
        $sum_comment += $rhh_count->{$language}{'comment'};
2587
        $sum_code    += $rhh_count->{$language}{'code'}   ;
2588
        $max_len      = length($language) if length($language) > $max_len;
2589
    }
2590
    my $column_1_offset = 0;
2591
       $column_1_offset = $max_len - 17 if $max_len > 17;
2592
    my $sum_lines = $sum_blank + $sum_comment + $sum_code;
2593
    $elapsed_sec = 0.5 unless $elapsed_sec;
2594
 
2595
    my $spacing_0 = 23;
2596
    my $spacing_1 = 13;
2597
    my $spacing_2 =  9;
2598
    my $spacing_3 = 17;
2599
    if (!$opt_3) {
2600
        $spacing_1 = 19;
2601
        $spacing_2 = 14;
2602
        $spacing_3 = 28;
2603
    }
2604
    $spacing_0 += $column_1_offset;
2605
    $spacing_1 += $column_1_offset;
2606
    $spacing_3 += $column_1_offset;
2607
    my %Format = (
2608
        '1' => { 'xml' => 'name="%s" ',
2609
                 'txt' => "\%-${spacing_0}s ",
2610
               },
2611
        '2' => { 'xml' => 'name="%s" ',
2612
                 'txt' => "\%-${spacing_3}s ",
2613
               },
2614
        '3' => { 'xml' => 'files_count="%d" ',
2615
                 'txt' => '%5d ',
2616
               },
2617
        '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
2618
                 'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
2619
               },
2620
        '5' => { 'xml' => 'blank="%3.2f" comment="%3.2f" code="%d" ',
2621
                 'txt' => "\%14.2f \%14.2f \%${spacing_2}d",
2622
               },
2623
        '6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
2624
                 'txt' => ' x %6.2f = %14.2f',
2625
               },
2626
    );
2627
    my $Style = "txt";
2628
       $Style = "xml" if $opt_xml ;
2629
       $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
2630
       $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
2631
 
2632
    my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
2633
       $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset) 
2634
            if (!$opt_3) and (68 + $column_1_offset) > 79;
2635
    my $data_line  = "";
2636
    my $first_column;
2637
    my $BY_LANGUAGE = 0;
2638
    my $BY_FILE     = 0;
2639
    if      ($report_type eq "by language") {
2640
        $first_column = "Language";
2641
        $BY_LANGUAGE  = 1;
2642
    } elsif ($report_type eq "by file")     {
2643
        $first_column = "File";
2644
        $BY_FILE      = 1;
2645
    } elsif ($report_type eq "by report file")     {
2646
        $first_column = "File";
2647
    } else {
2648
        $first_column = "Report File";
2649
    }
2650
 
2651
    my $header_line  = sprintf "%s v %s", $URL, $version;
2652
       $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
2653
                        $elapsed_sec           ,
2654
                        $sum_files/$elapsed_sec,
2655
                        $sum_lines/$elapsed_sec) unless $opt_sum_reports;
2656
    if ($opt_xml or $opt_yaml) {
2657
        if (!$ALREADY_SHOWED_HEADER) {
2658
            push @results, xml_or_yaml_header($URL, $version, $elapsed_sec, 
2659
                                              $sum_files, $sum_lines, $BY_FILE);
2660
            $ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
2661
            # --sum-reports yields two xml or yaml files, one by
2662
            # language and one by report file, each of which needs a header
2663
        }
2664
        if ($opt_xml) {
2665
            if ($BY_FILE or ($report_type eq "by report file")) {
2666
                push @results, "<files>";
2667
            } else {
2668
                push @results, "<languages>";
2669
            }
2670
        }
2671
    } else {
2672
        push @results, output_header($header_line, $hyphen_line, $BY_FILE);
2673
    }
2674
 
2675
    if ($Style eq "txt") {
2676
        # column headers
2677
        if (!$opt_3 and $BY_FILE) {
2678
            my $spacing_n = $spacing_1 - 11;
2679
            $data_line  = sprintf "%-${spacing_n}s ", $first_column;
2680
        } else {
2681
            $data_line  = sprintf "%-${spacing_1}s ", $first_column;
2682
        }
2683
        if ($BY_FILE) {
2684
            $data_line .= sprintf "%${spacing_2}s "  , " "    ;
2685
        } else {
2686
            $data_line .= sprintf "%${spacing_2}s "  , "files";
2687
        }
2688
        my $PCT_symbol = "";
2689
           $PCT_symbol = " \%" if $opt_by_percent;
2690
        $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
2691
            "blank${PCT_symbol}"   ,
2692
            "comment${PCT_symbol}" ,
2693
            "code";
2694
        $data_line .= sprintf " %8s   %14s",
2695
            "scale"         ,
2696
            "3rd gen. equiv"
2697
              if $opt_3;
2698
        push @results, $data_line;
2699
        push @results, $hyphen_line;
2700
    }
2701
    if ($opt_csv) {
2702
        my $header2;
2703
        if ($BY_FILE) {
2704
            $header2 = "language${DELIM}filename";
2705
        } else {
2706
            $header2 = "files${DELIM}language";
2707
        }
2708
        $header2 .= "${DELIM}blank${DELIM}comment${DELIM}code";
2709
        $header2 .= "${DELIM}scale${DELIM}3rd gen. equiv" if $opt_3;
2710
        $header2 .= ${DELIM} . '"' . $header_line . '"';
2711
        push @results, $header2;
2712
    }
2713
 
2714
    my $sum_scaled = 0;
2715
####foreach my $lang_or_file (keys %{$rhh_count}) {
2716
####    $rhh_count->{$lang_or_file}{'code'} = 0 unless 
2717
####        defined $rhh_count->{$lang_or_file}{'code'};
2718
####}
2719
    foreach my $lang_or_file (sort {
2720
                                 $rhh_count->{$b}{'code'} <=>
2721
                                 $rhh_count->{$a}{'code'}
2722
                               }
2723
                          keys %{$rhh_count}) {
2724
        next if $lang_or_file eq "by report file";
2725
        my ($factor, $scaled);
2726
        if ($BY_LANGUAGE or $BY_FILE) {
2727
            $factor = 1;
2728
            if ($BY_LANGUAGE) {
2729
                if (defined $rh_scale->{$lang_or_file}) {
2730
                    $factor = $rh_scale->{$lang_or_file};
2731
                } else {
2732
                    warn "No scale factor for $lang_or_file; using 1.00";
2733
                }
2734
            } else { # by individual code file
2735
                if ($report_type ne "by report file") {
2736
                    next unless defined $rhh_count->{$lang_or_file}{'lang'};
2737
                    next unless defined $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
2738
                    $factor = $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
2739
                }
2740
            }
2741
            $scaled = $factor*$rhh_count->{$lang_or_file}{'code'};
2742
        } else {
2743
            if (!defined $rhh_count->{$lang_or_file}{'scaled'}) {
2744
                $opt_3 = 0;
2745
                # If we're summing together files previously generated
2746
                # with --no3 then rhh_count->{$lang_or_file}{'scaled'}
2747
                # this variable will be undefined.  That should only
2748
                # happen when summing together by file however.
2749
            } elsif ($BY_LANGUAGE) {
2750
                warn "Missing scaled language info for $lang_or_file\n";
2751
            }
2752
            if ($opt_3) {
2753
                $scaled =         $rhh_count->{$lang_or_file}{'scaled'};
2754
                $factor = $scaled/$rhh_count->{$lang_or_file}{'code'};
2755
            }
2756
        }
2757
 
2758
        if ($BY_FILE) {
2759
            my $clean_filename = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
2760
               $clean_filename = xml_metachars($clean_filename) if $opt_xml;
2761
            $data_line  = sprintf $Format{'1'}{$Style}, $clean_filename;
2762
        } else {
2763
            $data_line  = sprintf $Format{'2'}{$Style}, $lang_or_file;
2764
        }
2765
        $data_line .= sprintf $Format{3}{$Style}  , 
2766
                        $rhh_count->{$lang_or_file}{'nFiles'} unless $BY_FILE;
2767
        if ($opt_by_percent) {
2768
          my $DEN = compute_denominator($opt_by_percent       ,
2769
                        $rhh_count->{$lang_or_file}{'code'}   ,
2770
                        $rhh_count->{$lang_or_file}{'comment'},
2771
                        $rhh_count->{$lang_or_file}{'blank'}  );
2772
          $data_line .= sprintf $Format{5}{$Style}  , 
2773
              $rhh_count->{$lang_or_file}{'blank'}   / $DEN * 100,
2774
              $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100,
2775
              $rhh_count->{$lang_or_file}{'code'}   ;
2776
        } else {
2777
          $data_line .= sprintf $Format{4}{$Style}  , 
2778
              $rhh_count->{$lang_or_file}{'blank'}  ,
2779
              $rhh_count->{$lang_or_file}{'comment'},
2780
              $rhh_count->{$lang_or_file}{'code'}   ;
2781
        }
2782
        $data_line .= sprintf $Format{6}{$Style}  ,
2783
            $factor                               ,
2784
            $scaled if $opt_3;
2785
        $sum_scaled  += $scaled if $opt_3;
2786
 
2787
        if ($opt_xml) {
2788
            if (defined $rhh_count->{$lang_or_file}{'lang'}) {
2789
                my $lang = $rhh_count->{$lang_or_file}{'lang'};
2790
                if (!defined $languages->{$lang}) {
2791
                    $languages->{$lang} = $lang;
2792
                }
2793
                $data_line.=' language="' . $lang . '" ';
2794
            }
2795
            if ($BY_FILE or ($report_type eq "by report file")) {
2796
                push @results, "  <file " . $data_line . "/>";
2797
            } else {
2798
                push @results, "  <language " . $data_line . "/>";
2799
            }
2800
        } elsif ($opt_yaml) {
2801
            push @results,$lang_or_file . ":";
2802
            push @results,"  nFiles: "  .$rhh_count->{$lang_or_file}{'nFiles'} 
2803
                unless $BY_FILE;
2804
            if ($opt_by_percent) {
2805
              my $DEN = compute_denominator($opt_by_percent       ,
2806
                            $rhh_count->{$lang_or_file}{'code'}   ,
2807
                            $rhh_count->{$lang_or_file}{'comment'},
2808
                            $rhh_count->{$lang_or_file}{'blank'}  );
2809
              push @results,"  blank: "   . sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'} / $DEN * 100);
2810
              push @results,"  comment: " . sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100);
2811
              push @results,"  code: "    . $rhh_count->{$lang_or_file}{'code'}   ;
2812
            } else {
2813
              push @results,"  blank: "   . $rhh_count->{$lang_or_file}{'blank'}  ;
2814
              push @results,"  comment: " . $rhh_count->{$lang_or_file}{'comment'};
2815
              push @results,"  code: "    . $rhh_count->{$lang_or_file}{'code'}   ;
2816
            }
2817
            push @results,"  language: ".$rhh_count->{$lang_or_file}{'lang'} 
2818
                if $BY_FILE;
2819
            if ($opt_3) {
2820
                push @results, "  scaled: " . $scaled;
2821
                push @results, "  factor: " . $factor;
2822
            }
2823
        } elsif ($opt_csv) {
2824
            my $extra_3 = "";
2825
               $extra_3 = "${DELIM}$factor${DELIM}$scaled" if $opt_3;
2826
            my $first_column = undef;
2827
            my $clean_name   = $lang_or_file;
2828
            if ($BY_FILE) { 
2829
                $first_column = $rhh_count->{$lang_or_file}{'lang'};
2830
                $clean_name   = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
2831
            } else {
2832
                $first_column = $rhh_count->{$lang_or_file}{'nFiles'};
2833
            }
2834
            my $str = $first_column                         . ${DELIM} .
2835
                      $clean_name                           . ${DELIM};
2836
            if ($opt_by_percent) { 
2837
              my $DEN = compute_denominator($opt_by_percent               ,
2838
                            $rhh_count->{$lang_or_file}{'code'}{'code'}   ,
2839
                            $rhh_count->{$lang_or_file}{'code'}{'comment'},
2840
                            $rhh_count->{$lang_or_file}{'code'}{'blank'}  );
2841
              $str .= sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'}   / $DEN * 100) . ${DELIM} .
2842
                      sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . ${DELIM} .
2843
                      $rhh_count->{$lang_or_file}{'code'};
2844
            } else {
2845
              $str .= $rhh_count->{$lang_or_file}{'blank'}  . ${DELIM} .
2846
                      $rhh_count->{$lang_or_file}{'comment'}. ${DELIM} .
2847
                      $rhh_count->{$lang_or_file}{'code'};
2848
            }
2849
            $str .= $extra_3;
2850
            push @results, $str;
2851
        } else {
2852
            push @results, $data_line;
2853
        }
2854
    }
2855
 
2856
    my $avg_scale = 1;  # weighted average of scale factors
2857
       $avg_scale = sprintf("%.2f", $sum_scaled / $sum_code) 
2858
            if $sum_code and $opt_3;
2859
 
2860
    if ($opt_xml) {
2861
        $data_line = "";
2862
        if (!$BY_FILE) {
2863
            $data_line .= sprintf "sum_files=\"%d\" ", $sum_files;
2864
        }
2865
        if ($opt_by_percent) {
2866
          my $DEN = compute_denominator($opt_by_percent    ,
2867
                        $sum_code, $sum_comment, $sum_blank);
2868
          $data_line .= sprintf $Format{'5'}{$Style},
2869
              $sum_blank   / $DEN * 100,
2870
              $sum_comment / $DEN * 100,
2871
              $sum_code    ;
2872
        } else {
2873
          $data_line .= sprintf $Format{'4'}{$Style},
2874
              $sum_blank   ,
2875
              $sum_comment ,
2876
              $sum_code    ;
2877
        }
2878
        $data_line .= sprintf $Format{'6'}{$Style},
2879
            $avg_scale   ,
2880
            $sum_scaled  if $opt_3;
2881
        push @results, "  <total " . $data_line . "/>";
2882
 
2883
        if ($BY_FILE or ($report_type eq "by report file")) {
2884
            push @results, "</files>";
2885
        } else {
2886
            foreach my $language (keys %{$languages}) {
2887
                push @results, '  <language name="' . $language . '"/>';
2888
            }
2889
            push @results, "</languages>";
2890
        }
2891
 
2892
        if (!$opt_by_file_by_lang or $ALREADY_SHOWED_XML_SECTION) {
2893
            push @results, "</results>";
2894
        } else {
2895
            $ALREADY_SHOWED_XML_SECTION = 1;
2896
        }
2897
    } elsif ($opt_yaml) {
2898
        push @results, "SUM:";
2899
        if ($opt_by_percent) {
2900
          my $DEN = compute_denominator($opt_by_percent    ,
2901
                        $sum_code, $sum_comment, $sum_blank);
2902
          push @results, "  blank: "  . sprintf("%.2f", $sum_blank   / $DEN * 100);
2903
          push @results, "  comment: ". sprintf("%.2f", $sum_comment / $DEN * 100);
2904
          push @results, "  code: "   . $sum_code   ;
2905
        } else {
2906
          push @results, "  blank: "  . $sum_blank  ;
2907
          push @results, "  comment: ". $sum_comment;
2908
          push @results, "  code: "   . $sum_code   ;
2909
        }
2910
        push @results, "  nFiles: " . $sum_files  ;
2911
        if ($opt_3) {
2912
            push @results, "  scaled: " . $sum_scaled;
2913
            push @results, "  factor: " . $avg_scale ;
2914
        }
2915
    } elsif ($opt_csv) {
2916
        # do nothing
2917
    } else {
2918
 
2919
        if ($BY_FILE) {
2920
            $data_line  = sprintf "%-${spacing_0}s ", "SUM:"  ;
2921
        } else {
2922
            $data_line  = sprintf "%-${spacing_1}s ", "SUM:"  ;
2923
            $data_line .= sprintf "%${spacing_2}d ", $sum_files;
2924
        }
2925
        if ($opt_by_percent) {
2926
          my $DEN = compute_denominator($opt_by_percent    ,
2927
                        $sum_code, $sum_comment, $sum_blank);
2928
          $data_line .= sprintf $Format{'5'}{$Style},
2929
              $sum_blank   / $DEN * 100,
2930
              $sum_comment / $DEN * 100,
2931
              $sum_code    ;
2932
        } else {
2933
          $data_line .= sprintf $Format{'4'}{$Style},
2934
              $sum_blank   ,
2935
              $sum_comment ,
2936
              $sum_code    ;
2937
        }
2938
        $data_line .= sprintf $Format{'6'}{$Style},
2939
            $avg_scale   ,
2940
            $sum_scaled if $opt_3;
2941
        push @results, $hyphen_line if $sum_files > 1 or $opt_sum_one;
2942
        push @results, $data_line   if $sum_files > 1 or $opt_sum_one;
2943
        push @results, $hyphen_line;
2944
    }
2945
    write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
2946
    print "<- generate_report\n" if $opt_v > 2;
2947
    return @results;
2948
} # 1}}}
2949
sub print_errors {                           # {{{1
2950
    my ($rh_Error_Codes, # in
2951
        $raa_errors    , # in
2952
       ) = @_;
2953
 
2954
    print "-> print_errors\n" if $opt_v > 2;
2955
    my %error_string = reverse(%{$rh_Error_Codes});
2956
    my $nErrors      = scalar @{$raa_errors};
2957
    warn sprintf "\n%d error%s:\n", plural_form(scalar @Errors);
2958
    for (my $i = 0; $i < $nErrors; $i++) {
2959
        warn sprintf "%s:  %s\n", 
2960
                     $error_string{ $raa_errors->[$i][0] },
2961
                     $raa_errors->[$i][1] ;
2962
    }
2963
    print "<- print_errors\n" if $opt_v > 2;
2964
 
2965
} # 1}}}
2966
sub write_lang_def {                         # {{{1
2967
    my ($file                     ,
2968
        $rh_Language_by_Extension , # in
2969
        $rh_Language_by_Script    , # in
2970
        $rh_Language_by_File      , # in
2971
        $rhaa_Filters_by_Language , # in
2972
        $rh_Not_Code_Extension    , # in
2973
        $rh_Not_Code_Filename     , # in
2974
        $rh_Scale_Factor          , # in
2975
        $rh_EOL_Continuation_re   , # in
2976
       ) = @_;
2977
 
2978
    print "-> write_lang_def($file)\n" if $opt_v > 2;
2979
    my $OUT = new IO::File $file, "w";
2980
    die "Unable to write to $file\n" unless defined $OUT;
2981
 
2982
    foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
2983
        next if $language eq "MATLAB/Objective C/MUMPS/Mercury" or
2984
                $language eq "PHP/Pascal"               or
2985
                $language eq "Pascal/Puppet"            or
2986
                $language eq "Lisp/OpenCL"              or
2987
                $language eq "Lisp/Julia"               or
2988
                $language eq "Perl/Prolog"              or
2989
                $language eq "D/dtrace"                 or
2990
                $language eq "IDL/Qt Project/Prolog"    or
2991
                $language eq "(unknown)";
2992
        printf $OUT "%s\n", $language;
2993
        foreach my $filter (@{$rhaa_Filters_by_Language->{$language}}) {
2994
            printf $OUT "    filter %s", $filter->[0];
2995
            printf $OUT " %s", $filter->[1] if defined $filter->[1];
2996
            # $filter->[0] == 'remove_between_general',
2997
            #                 'remove_between_regex', and
2998
            #                 'remove_matches_2re' have two args
2999
            printf $OUT " %s", $filter->[2] if defined $filter->[2];
3000
            print  $OUT "\n";
3001
        }
3002
        foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
3003
            if ($language eq $rh_Language_by_Extension->{$ext}) {
3004
                printf $OUT "    extension %s\n", $ext;
3005
            }
3006
        }
3007
        foreach my $filename (sort keys %{$rh_Language_by_File}) {
3008
            if ($language eq $rh_Language_by_File->{$filename}) {
3009
                printf $OUT "    filename %s\n", $filename;
3010
            }
3011
        }
3012
        foreach my $script_exe (sort keys %{$rh_Language_by_Script}) {
3013
            if ($language eq $rh_Language_by_Script->{$script_exe}) {
3014
                printf $OUT "    script_exe %s\n", $script_exe;
3015
            }
3016
        }
3017
        printf $OUT "    3rd_gen_scale %.2f\n", $rh_Scale_Factor->{$language};
3018
        if (defined $rh_EOL_Continuation_re->{$language}) {
3019
            printf $OUT "    end_of_line_continuation %s\n", 
3020
                $rh_EOL_Continuation_re->{$language};
3021
        }
3022
    }
3023
 
3024
    $OUT->close;
3025
    print "<- write_lang_def\n" if $opt_v > 2;
3026
} # 1}}}
3027
sub read_lang_def {                          # {{{1
3028
    my ($file                     ,
3029
        $rh_Language_by_Extension , # out
3030
        $rh_Language_by_Script    , # out
3031
        $rh_Language_by_File      , # out
3032
        $rhaa_Filters_by_Language , # out
3033
        $rh_Not_Code_Extension    , # out
3034
        $rh_Not_Code_Filename     , # out
3035
        $rh_Scale_Factor          , # out
3036
        $rh_EOL_Continuation_re   , # out
3037
        $rh_EOL_abc,
3038
       ) = @_;
3039
 
3040
 
3041
    print "-> read_lang_def($file)\n" if $opt_v > 2;
3042
    my $IN = new IO::File $file, "r";
3043
    die "Unable to read $file.\n" unless defined $IN;
3044
 
3045
    my $language = "";
3046
    while (<$IN>) {
3047
        next if /^\s*#/ or /^\s*$/;
3048
 
3049
        if (/^(\w+.*?)\s*$/) {
3050
            $language = $1;
3051
            next;
3052
        }
3053
        die "Missing computer language name, line $. of $file\n"
3054
            unless $language;
3055
 
3056
        if      (/^\s{4}filter\s+(remove_between_(general|2re|regex))
3057
                       \s+(\S+)\s+(\S+)s*$/x) {
3058
            push @{$rhaa_Filters_by_Language->{$language}}, [ 
3059
                  $1 , $3 , $4 ]
3060
 
3061
        } elsif (/^\s{4}filter\s+(\w+)\s*$/) {
3062
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
3063
 
3064
        } elsif (/^\s{4}filter\s+(\w+)\s+(.*?)\s*$/) {
3065
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
3066
 
3067
        } elsif (/^\s{4}extension\s+(\S+)\s*$/) {
3068
            if (defined $rh_Language_by_Extension->{$1}) {
3069
                die "File extension collision:  $1 ",
3070
                    "maps to languages '$rh_Language_by_Extension->{$1}' ",
3071
                    "and '$language'\n" ,
3072
                    "Edit $file and remove $1 from one of these two ",
3073
                    "language definitions.\n";
3074
            }
3075
            $rh_Language_by_Extension->{$1} = $language;
3076
 
3077
        } elsif (/^\s{4}filename\s+(\S+)\s*$/) {
3078
            $rh_Language_by_File->{$1} = $language;
3079
 
3080
        } elsif (/^\s{4}script_exe\s+(\S+)\s*$/) {
3081
            $rh_Language_by_Script->{$1} = $language;
3082
 
3083
        } elsif (/^\s{4}3rd_gen_scale\s+(\S+)\s*$/) {
3084
            $rh_Scale_Factor->{$language} = $1;
3085
 
3086
        } elsif (/^\s{4}end_of_line_continuation\s+(\S+)\s*$/) {
3087
            $rh_EOL_Continuation_re->{$language} = $1;
3088
 
3089
        } else {
3090
            die "Unexpected data line $. of $file:\n$_\n";
3091
        }
3092
 
3093
    }
3094
    $IN->close;
3095
    print "<- read_lang_def\n" if $opt_v > 2;
3096
} # 1}}}
3097
sub merge_lang_def {                         # {{{1
3098
    my ($file                     ,
3099
        $rh_Language_by_Extension , # in/out
3100
        $rh_Language_by_Script    , # in/out
3101
        $rh_Language_by_File      , # in/out
3102
        $rhaa_Filters_by_Language , # in/out
3103
        $rh_Not_Code_Extension    , # in/out
3104
        $rh_Not_Code_Filename     , # in/out
3105
        $rh_Scale_Factor          , # in/out
3106
        $rh_EOL_Continuation_re   , # in/out
3107
        $rh_EOL_abc,
3108
       ) = @_;
3109
 
3110
 
3111
    print "-> merge_lang_def($file)\n" if $opt_v > 2;
3112
    my $IN = new IO::File $file, "r";
3113
    die "Unable to read $file.\n" unless defined $IN;
3114
 
3115
    my $language        = "";  
3116
    my $already_know_it = undef;
3117
    while (<$IN>) {
3118
        next if /^\s*#/ or /^\s*$/;
3119
 
3120
        if (/^(\w+.*?)\s*$/) {
3121
            $language = $1;
3122
            $already_know_it = defined $rh_Scale_Factor->{$language};
3123
            next;
3124
        }
3125
        die "Missing computer language name, line $. of $file\n"
3126
            unless $language;
3127
 
3128
        if      (/^    filter\s+(\w+)\s*$/) {
3129
            next if $already_know_it;
3130
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
3131
 
3132
        } elsif (/^    filter\s+(\w+)\s+(.*?)\s*$/) {
3133
            next if $already_know_it;
3134
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
3135
 
3136
        } elsif (/^    extension\s+(\S+)\s*$/) {
3137
            next if $already_know_it;
3138
            if (defined $rh_Language_by_Extension->{$1}) {
3139
                die "File extension collision:  $1 ",
3140
                    "maps to languages '$rh_Language_by_Extension->{$1}' ",
3141
                    "and '$language'\n" ,
3142
                    "Edit $file and remove $1 from one of these two ",
3143
                    "language definitions.\n";
3144
            }
3145
            $rh_Language_by_Extension->{$1} = $language;
3146
 
3147
        } elsif (/^    filename\s+(\S+)\s*$/) {
3148
            next if $already_know_it;
3149
            $rh_Language_by_File->{$1} = $language;
3150
 
3151
        } elsif (/^    script_exe\s+(\S+)\s*$/) {
3152
            next if $already_know_it;
3153
            $rh_Language_by_Script->{$1} = $language;
3154
 
3155
        } elsif (/^    3rd_gen_scale\s+(\S+)\s*$/) {
3156
            next if $already_know_it;
3157
            $rh_Scale_Factor->{$language} = $1;
3158
 
3159
        } elsif (/^    end_of_line_continuation\s+(\S+)\s*$/) {
3160
            next if $already_know_it;
3161
            $rh_EOL_Continuation_re->{$language} = $1;
3162
 
3163
        } else {
3164
            die "Unexpected data line $. of $file:\n$_\n";
3165
        }
3166
 
3167
    }
3168
    $IN->close;
3169
    print "<- merge_lang_def\n" if $opt_v > 2;
3170
} # 1}}}
3171
sub print_extension_info {                   # {{{1
3172
    my ($extension,) = @_;
3173
    if ($extension) {  # show information on this extension
3174
        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
3175
            # Language_by_Extension{f}    = 'Fortran 77' 
3176
            printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext}
3177
                if $ext =~ m{$extension}i;
3178
        }
3179
    } else {           # show information on all  extensions
3180
        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
3181
            # Language_by_Extension{f}    = 'Fortran 77' 
3182
            printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext};
3183
        }
3184
    }
3185
} # 1}}}
3186
sub print_language_info {                    # {{{1
3187
    my ($language,         
3188
        $prefix ,) = @_;
3189
    my %extensions = (); # the subset matched by the given $language value
3190
    if ($language) {  # show information on this language
3191
        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
3192
            # Language_by_Extension{f}    = 'Fortran 77' 
3193
            push @{$extensions{$Language_by_Extension{$ext}} }, $ext
3194
                if lc $Language_by_Extension{$ext} eq lc $language;
3195
#               if $Language_by_Extension{$ext} =~ m{$language}i;
3196
        }
3197
    } else {          # show information on all  languages
3198
        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
3199
            # Language_by_Extension{f}    = 'Fortran 77' 
3200
            push @{$extensions{$Language_by_Extension{$ext}} }, $ext
3201
        }
3202
    }
3203
 
3204
    # add exceptions (one file extension mapping to multiple languages)
3205
    if (!$language or 
3206
        $language =~ /^(Objective C|MATLAB|MUMPS|Mercury)$/i) {
3207
        push @{$extensions{'Objective C'}}, "m";
3208
        push @{$extensions{'MATLAB'}}     , "m";
3209
        push @{$extensions{'MUMPS'}}      , "m";
3210
        delete $extensions{'MATLAB/Objective C/MUMPS/Mercury'};
3211
    } elsif ($language =~ /^(Lisp|OpenCL)$/i) {
3212
        push @{$extensions{'Lisp'}}  , "cl";
3213
        push @{$extensions{'OpenCL'}}, "cl";
3214
        delete $extensions{'Lisp/OpenCL'};
3215
    } elsif ($language =~ /^(Lisp|Julia)$/i) {
3216
        push @{$extensions{'Lisp'}}  , "jl";
3217
        push @{$extensions{'Julia'}} , "jl";
3218
        delete $extensions{'Lisp/Julia'};
3219
    } elsif ($language =~ /^(Perl|Prolog)$/i) {
3220
        push @{$extensions{'Perl'}}  , "pl";
3221
        push @{$extensions{'Prolog'}}, "pl";
3222
        delete $extensions{'Perl/Prolog'};
3223
    } elsif ($language =~ /^(IDL|Qt Project|Prolog)$/i) {
3224
        push @{$extensions{'IDL'}}       , "pro";
3225
        push @{$extensions{'Qt Project'}}, "pro";
3226
        push @{$extensions{'Prolog'}}    , "pro";
3227
        delete $extensions{'IDL/Qt Project/Prolog'};
3228
    } elsif ($language =~ /^(D|dtrace)$/i) {
3229
        push @{$extensions{'D'}}       , "d";
3230
        push @{$extensions{'dtrace'}}  , "d";
3231
        delete $extensions{'D/dtrace'};
3232
    } elsif ($language =~ /^(Ant)$/i) {
3233
        push @{$extensions{'Ant'}}  , "build.xml";
3234
        delete $extensions{'Ant/XML'};
3235
    }
3236
 
3237
    if (%extensions) {
3238
        foreach my $lang (sort {lc $a cmp lc $b } keys %extensions) {
3239
            if ($prefix) {
3240
                printf "%s %s\n", $prefix, join(", ", @{$extensions{$lang}});
3241
            } else {
3242
                printf "%-26s (%s)\n", $lang, join(", ", @{$extensions{$lang}});
3243
            }
3244
        }
3245
    }
3246
} # 1}}}
3247
sub print_language_filters {                 # {{{1
3248
    my ($language,) = @_;
3249
    if (!@{$Filters_by_Language{$language}}) {
3250
        warn "Unknown language: $language\n";
3251
        warn "Use --show-lang to list all defined languages.\n";
3252
        return;
3253
    }
3254
    printf "%s\n", $language;
3255
    foreach my $filter (@{$Filters_by_Language{$language}}) {
3256
        printf "    filter %s", $filter->[0];
3257
        printf "  %s", $filter->[1] if defined $filter->[1];
3258
        printf "  %s", $filter->[2] if defined $filter->[2];
3259
        print  "\n";
3260
    }
3261
    print_language_info($language, "    extensions:");
3262
} # 1}}}
3263
sub make_file_list {                         # {{{1
3264
    my ($ra_arg_list,  # in   file and/or directory names to examine
3265
        $rh_Err     ,  # in   hash of error codes
3266
        $raa_errors ,  # out  errors encountered
3267
        $rh_ignored ,  # out  files not recognized as computer languages
3268
        ) = @_;
3269
    print "-> make_file_list(@{$ra_arg_list})\n" if $opt_v > 2;
3270
 
3271
    my ($fh, $filename);
3272
    if ($opt_categorized) {
3273
        $filename = $opt_categorized;
3274
        $fh = new IO::File $filename, "+>";  # open for read/write
3275
        die "Unable to write to $filename:  $!\n" unless defined $fh;
3276
    } elsif ($opt_sdir) {
3277
        # write to the user-defined scratch directory
3278
        $filename = $opt_sdir . '/cloc_file_list.txt';
3279
        $fh = new IO::File $filename, "+>";  # open for read/write
3280
        die "Unable to write to $filename:  $!\n" unless defined $fh;
3281
    } else {
3282
        # let File::Temp create a suitable temporary file
3283
        ($fh, $filename) = tempfile(UNLINK => 1);  # delete file on exit
3284
        print "Using temp file list [$filename]\n" if $opt_v;
3285
    }
3286
 
3287
    my @dir_list = ();
3288
    foreach my $file_or_dir (@{$ra_arg_list}) {
3289
#print "make_file_list file_or_dir=$file_or_dir\n";
3290
        my $size_in_bytes = 0;
3291
        if (!-r $file_or_dir) {
3292
            push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file_or_dir];
3293
            next;
3294
        }
3295
        if (is_file($file_or_dir)) {
3296
            if (!(-s $file_or_dir)) {   # 0 sized file, named pipe, socket 
3297
                $rh_ignored->{$file_or_dir} = 'zero sized file';
3298
                next;
3299
            } elsif (-B $file_or_dir and !$opt_read_binary_files) { 
3300
                # avoid binary files unless user insists on reading them
3301
                if ($opt_unicode) {
3302
                    # only ignore if not a Unicode file w/trivial 
3303
                    # ASCII transliteration
3304
                    if (!unicode_file($file_or_dir)) {
3305
                        $rh_ignored->{$file_or_dir} = 'binary file';
3306
                        next;
3307
                    }
3308
                } else {
3309
                    $rh_ignored->{$file_or_dir} = 'binary file';
3310
                    next;
3311
                }
3312
            }
3313
            push @file_list, "$file_or_dir";
3314
        } elsif (is_dir($file_or_dir)) {
3315
            push @dir_list, $file_or_dir;
3316
        } else {
3317
            push @{$raa_errors}, [$rh_Err->{'Neither file nor directory'} , $file_or_dir];
3318
            $rh_ignored->{$file_or_dir} = 'not file, not directory';
3319
        }
3320
    }
3321
    foreach my $dir (@dir_list) {
3322
#print "make_file_list dir=$dir\n";
3323
        # populates global variable @file_list
3324
        find({wanted     => \&files            ,
3325
              preprocess => \&find_preprocessor,
3326
              follow     =>  $opt_follow_links }, $dir);  
3327
    }
3328
    $nFiles_Found = scalar @file_list;
3329
    printf "%8d text file%s.\n", plural_form($nFiles_Found) unless $opt_quiet;
3330
    write_file($opt_found, sort @file_list) if $opt_found;
3331
 
3332
    my $nFiles_Categorized = 0;
3333
    foreach my $file (@file_list) {
3334
        printf "classifying $file\n" if $opt_v > 2;
3335
 
3336
        my $basename = basename $file;
3337
        if ($Not_Code_Filename{$basename}) {
3338
            $rh_ignored->{$file} = "listed in " . '$' .
3339
                "Not_Code_Filename{$basename}";
3340
            next;
3341
        } elsif ($basename =~ m{~$}) {
3342
            $rh_ignored->{$file} = "temporary editor file";
3343
            next;
3344
        }
3345
 
3346
        my $size_in_bytes = (stat $file)[7];
5792 dpurdie 3347
 
3348
        #   Vix extension
3349
        #   Perl under Windows can't handle pats longer that 255, so don't try
3350
        #   stat will return undef
3351
        #
3352
        if (! defined $size_in_bytes)
3353
        {
3354
            $rh_ignored->{$file} = "Windows path too long";
3355
            next;
3356
        }
3357
 
5282 dpurdie 3358
        my $language      = "";
3359
        if ($All_One_Language) {
3360
            # user over-rode auto-language detection by using
3361
            # --force-lang with just a language name (no extension)
3362
            $language      = $All_One_Language;
3363
        } else {
3364
            $language      = classify_file($file      ,
3365
                                           $rh_Err    ,
3366
                                           $raa_errors,
3367
                                           $rh_ignored);
3368
        }
5792 dpurdie 3369
 
5282 dpurdie 3370
die  "make_file_list($file) undef size" unless defined $size_in_bytes;
3371
die  "make_file_list($file) undef lang" unless defined $language;
3372
        printf $fh "%d,%s,%s\n", $size_in_bytes, $language, $file;
3373
        ++$nFiles_Categorized;
3374
        #printf "classified %d files\n", $nFiles_Categorized 
3375
        #    unless (!$opt_progress_rate or 
3376
        #            ($nFiles_Categorized % $opt_progress_rate));
3377
    }
3378
    printf "classified %d files\r", $nFiles_Categorized 
3379
        if !$opt_quiet and $nFiles_Categorized > 1;
3380
 
3381
    print "<- make_file_list()\n" if $opt_v > 2;
3382
 
3383
    return $fh;   # handle to the file containing the list of files to process
3384
}  # 1}}}
3385
sub remove_duplicate_files {                 # {{{1
3386
    my ($fh                   , # in 
3387
        $rh_Language          , # out
3388
        $rh_unique_source_file, # out
3389
        $rh_Err               , # in
3390
        $raa_errors           , # out  errors encountered
3391
        $rh_ignored           , # out
3392
        ) = @_;
3393
 
3394
    # Check for duplicate files by comparing file sizes.
3395
    # Where files are equally sized, compare their MD5 checksums.
3396
    print "-> remove_duplicate_files\n" if $opt_v > 2;
3397
 
3398
    my $n = 0;
3399
    my %files_by_size = (); # files_by_size{ # bytes } = [ list of files ]
3400
    seek($fh, 0, 0); # rewind to beginning of the temp file
3401
    while (<$fh>) {
3402
        ++$n;
3403
        my ($size_in_bytes, $language, $file) = split(/,/, $_, 3);
3404
        chomp($file);
3405
        $rh_Language->{$file} = $language;
3406
        push @{$files_by_size{$size_in_bytes}}, $file;
3407
        if ($opt_skip_uniqueness) {
3408
            $rh_unique_source_file->{$file} = 1;
3409
        }
3410
    }
3411
    return if $opt_skip_uniqueness;
3412
    if ($opt_progress_rate and ($n > $opt_progress_rate)) {
3413
        printf "Duplicate file check %d files (%d known unique)\r", 
3414
            $n, scalar keys %files_by_size;
3415
    }
3416
    $n = 0;
3417
    foreach my $bytes (sort {$a <=> $b} keys %files_by_size) {
3418
        ++$n;
3419
        printf "Unique: %8d files                                          \r",
3420
            $n unless (!$opt_progress_rate or ($n % $opt_progress_rate));
3421
        if (scalar @{$files_by_size{$bytes}} == 1) {
3422
            # only one file is this big; must be unique
3423
            $rh_unique_source_file->{$files_by_size{$bytes}[0]} = 1;
3424
            next;
3425
        } else {
3426
#print "equally sized files: ",join(", ", @{$files_by_size{$bytes}}), "\n";
3427
            # Files in the list @{$files_by_size{$bytes} all are
3428
            # $bytes long.  Sort the list by file basename.
3429
 
3430
          # # sorting on basename causes repeatability problems
3431
          # # if the basename is not unique (eg "includeA/x.h"
3432
          # # and "includeB/x.h".  Instead, sort on full path.
3433
          # # Ref bug #114.
3434
          # my @sorted_bn = ();
3435
          # my %BN = map { basename($_) => $_ } @{$files_by_size{$bytes}};
3436
          # foreach my $F (sort keys %BN) {
3437
          #     push @sorted_bn, $BN{$F};
3438
          # }
3439
 
3440
            my @sorted_bn = sort @{$files_by_size{$bytes}};
3441
 
3442
            foreach my $F (different_files(\@sorted_bn  ,
3443
                                            $rh_Err     ,
3444
                                            $raa_errors ,
3445
                                            $rh_ignored ) ) {
3446
                $rh_unique_source_file->{$F} = 1;
3447
            }
3448
        }
3449
    }
3450
    print "<- remove_duplicate_files\n" if $opt_v > 2;
3451
} # 1}}}
3452
sub find_preprocessor {                      # {{{1
3453
    # invoked by File::Find's find()   
3454
    # Reads global variable %Exclude_Dir.
3455
    # Populates global variable %Ignored.
3456
    # Reject files/directories in cwd which are in the exclude list.
3457
 
3458
    my @ok = ();
3459
    foreach my $F_or_D (@_) {  # pure file or directory name, no separators
3460
        if ($Exclude_Dir{$F_or_D}) {
3461
            $Ignored{$File::Find::name} = "--exclude-dir=$Exclude_Dir{$F_or_D}";
3462
        } elsif (-d $F_or_D) {
3463
            if ($opt_not_match_d and $F_or_D =~ m{$opt_not_match_d}) {
3464
                $Ignored{$File::Find::name} = "--not-match-d=$opt_not_match_d";
3465
            } else {
3466
                push @ok, $F_or_D;
3467
            }
3468
 
3469
        } else {
3470
            push @ok, $F_or_D;
3471
        }
3472
    }   
3473
    return @ok;
3474
} # 1}}}
3475
sub files {                                  # {{{1
3476
    # invoked by File::Find's find()   Populates global variable @file_list.
3477
    # See also find_preprocessor() which prunes undesired directories.
3478
 
3479
    my $Dir = cwd(); # not $File::Find::dir which just gives relative path
3480
    if ($opt_match_f    ) { return unless /$opt_match_f/;     }
3481
    if ($opt_not_match_f) { return if     /$opt_not_match_f/; }
3482
    if ($opt_match_d    ) { return unless $Dir =~ m{$opt_match_d}     }
3483
 
3484
    my $nBytes = -s $_ ;
3485
    if (!$nBytes) {
3486
        $Ignored{$File::Find::name} = 'zero sized file';
3487
        printf "files(%s)  zero size\n", $File::Find::name if $opt_v > 5;
3488
    }
3489
    return unless $nBytes  ; # attempting other tests w/pipe or socket will hang
3490
    if ($nBytes > $opt_max_file_size*1024**2) {
3491
        $Ignored{$File::Find::name} = "file size of " .
3492
            $nBytes/1024**2 . " MB exceeds max file size of " .
3493
            "$opt_max_file_size MB";
3494
        printf "file(%s)  exceeds $opt_max_file_size MB\n", 
3495
            $File::Find::name if $opt_v > 5;
3496
        return;
3497
    }
3498
    my $is_dir = is_dir($_);
3499
    my $is_bin = -B     $_ ;
3500
    printf "files(%s)  size=%d is_dir=%d  -B=%d\n",
3501
        $File::Find::name, $nBytes, $is_dir, $is_bin if $opt_v > 5;
3502
    $is_bin = 0 if $opt_unicode and unicode_file($_);
3503
    $is_bin = 0 if $opt_read_binary_files;
3504
    return if $is_dir or $is_bin;
3505
    ++$nFiles_Found;
3506
    printf "%8d files\r", $nFiles_Found 
3507
        unless (!$opt_progress_rate or ($nFiles_Found % $opt_progress_rate));
3508
    push @file_list, $File::Find::name;
3509
} # 1}}}
3510
sub archive_files {                          # {{{1
3511
    # invoked by File::Find's find()  Populates global variable @binary_archive
3512
    foreach my $ext (keys %Known_Binary_Archives) {
3513
        push @binary_archive, $File::Find::name 
3514
            if $File::Find::name =~ m{$ext$};
3515
    }
3516
} # 1}}}
3517
sub is_file {                                # {{{1
3518
    # portable method to test if item is a file
3519
    # (-f doesn't work in ActiveState Perl on Windows)
3520
    my $item = shift @_;
3521
 
3522
    if ($ON_WINDOWS) {
3523
        my $mode = (stat $item)[2];
3524
           $mode = 0 unless $mode;
3525
        if ($mode & 0100000) { return 1; } 
3526
        else                 { return 0; }
3527
    } else {
3528
        return (-f $item);  # works on Unix, Linux, CygWin, z/OS
3529
    }
3530
} # 1}}}
3531
sub is_dir {                                 # {{{1
3532
    # portable method to test if item is a directory
3533
    # (-d doesn't work in ActiveState Perl on Windows)
3534
    my $item = shift @_;
3535
 
3536
    if ($ON_WINDOWS) {
3537
        my $mode = (stat $item)[2];
3538
           $mode = 0 unless $mode;
3539
        if ($mode & 0040000) { return 1; } 
3540
        else                 { return 0; }
3541
    } else {
3542
        return (-d $item);  # works on Unix, Linux, CygWin, z/OS
3543
    }
3544
} # 1}}}
3545
sub is_excluded {                            # {{{1
3546
    my ($file       , # in
3547
        $excluded   , # in   hash of excluded directories
3548
       ) = @_;
3549
    my($filename, $filepath, $suffix) = fileparse($file);
3550
    foreach my $path (sort keys %{$excluded}) {
3551
        return 1 if ($filepath =~ m{^$path/}i);
3552
    }
3553
} # 1}}}
3554
sub classify_file {                          # {{{1
3555
    my ($full_file   , # in
3556
        $rh_Err      , # in   hash of error codes
3557
        $raa_errors  , # out
3558
        $rh_ignored  , # out
3559
       ) = @_;
3560
 
3561
    print "-> classify_file($full_file)\n" if $opt_v > 2;
3562
    my $language = "(unknown)";
3563
 
3564
    if (basename($full_file) eq "-" && defined $opt_stdin_name) {
3565
       $full_file = $opt_stdin_name;
3566
    }
3567
 
3568
    my $look_at_first_line = 0;
3569
    my $file = basename $full_file; 
3570
    if ($opt_autoconf and $file =~ /\.in$/) {
3571
       $file =~ s/\.in$//;
3572
    }
3573
    return $language if $Not_Code_Filename{$file}; # (unknown)
3574
    return $language if $file =~ m{~$}; # a temp edit file (unknown)
3575
    if (defined $Language_by_File{$file}) {
3576
        if      ($Language_by_File{$file} eq "Ant/XML") {
3577
            return Ant_or_XML(  $full_file, $rh_Err, $raa_errors);
3578
        } elsif ($Language_by_File{$file} eq "Maven/XML") {
3579
            return Maven_or_XML($full_file, $rh_Err, $raa_errors);
3580
        } else {
3581
            return $Language_by_File{$file};
3582
        }
3583
    }
3584
 
3585
    if ($file =~ /\.([^\.]+)$/) { # has an extension
3586
      print "$full_file extension=[$1]\n" if $opt_v > 2;
3587
      my $extension = $1;
3588
         # Windows file names are case insensitive so map 
3589
         # all extensions to lowercase there.
3590
         $extension = lc $extension if $ON_WINDOWS;  
3591
      my @extension_list = ( $extension );
3592
      if ($file =~ /\.([^\.]+\.[^\.]+)$/) { # has a double extension
3593
          my $extension = $1;
3594
          $extension = lc $extension if $ON_WINDOWS;  
3595
          unshift @extension_list, $extension;  # examine double ext first
3596
      }
3597
      foreach my $extension (@extension_list) {
3598
        if ($Not_Code_Extension{$extension} and 
3599
           !$Forced_Extension{$extension}) {
3600
           # If .1 (for example) is an extension that would ordinarily be
3601
           # ignored but the user has insisted this be counted with the
3602
           # --force-lang option, then go ahead and count it.
3603
            $rh_ignored->{$full_file} = 
3604
                'listed in $Not_Code_Extension{' . $extension . '}';
3605
            return $language;
3606
        }
3607
        if (defined $Language_by_Extension{$extension}) {
3608
            if ($Language_by_Extension{$extension} eq
3609
                'MATLAB/Objective C/MUMPS/Mercury') {
3610
                my $lang_M_or_O = "";
3611
                matlab_or_objective_C($full_file , 
3612
                                      $rh_Err    ,
3613
                                      $raa_errors,
3614
                                     \$lang_M_or_O);
3615
                if ($lang_M_or_O) {
3616
                    return $lang_M_or_O;
3617
                } else { # an error happened in matlab_or_objective_C()
3618
                    $rh_ignored->{$full_file} = 
3619
                        'failure in matlab_or_objective_C()';
3620
                    return $language; # (unknown)
3621
                }
3622
            } elsif ($Language_by_Extension{$extension} eq 'PHP/Pascal') {
3623
                if (really_is_php($full_file)) {
3624
                    return 'PHP';
3625
                } elsif (really_is_incpascal($full_file)) {
3626
                    return 'Pascal';
3627
                } else {
3628
                    return $language; # (unknown)
3629
                }
3630
            } elsif ($Language_by_Extension{$extension} eq 'Pascal/Puppet') {
3631
                my $lang_Pasc_or_Pup = "";
3632
                pascal_or_puppet(     $full_file , 
3633
                                      $rh_Err    ,
3634
                                      $raa_errors,
3635
                                     \$lang_Pasc_or_Pup);
3636
                if ($lang_Pasc_or_Pup) {
3637
                    return $lang_Pasc_or_Pup;
3638
                } else { # an error happened in pascal_or_puppet()
3639
                    $rh_ignored->{$full_file} = 
3640
                        'failure in pascal_or_puppet()';
3641
                    return $language; # (unknown)
3642
                }
3643
            } elsif ($Language_by_Extension{$extension} eq 'Lisp/OpenCL') {
3644
                return Lisp_or_OpenCL($full_file, $rh_Err, $raa_errors);
3645
            } elsif ($Language_by_Extension{$extension} eq 'Lisp/Julia') {
3646
                return Lisp_or_Julia( $full_file, $rh_Err, $raa_errors);
3647
            } elsif ($Language_by_Extension{$extension} eq 'Perl/Prolog') {
3648
                return Perl_or_Prolog($full_file, $rh_Err, $raa_errors);
3649
            } elsif ($Language_by_Extension{$extension} eq 
3650
                     'IDL/Qt Project/Prolog') {
3651
                return IDL_or_QtProject($full_file, $rh_Err, $raa_errors);
3652
            } elsif ($Language_by_Extension{$extension} eq 'D/dtrace') {
3653
                # is it D or an init.d shell script?
3654
                my $a_script = really_is_D($full_file, $rh_Err, $raa_errors);
3655
                if ($a_script) {
3656
                    # could be dtrace, sh, bash or anything one would
3657
                    # write an init.d script in
3658
                    if (defined $Language_by_Script{$a_script}) {
3659
                        return $Language_by_Script{$a_script};
3660
                    } else {
3661
                        $rh_ignored->{$full_file} = 
3662
                            "Unrecognized script language, '$a_script'";
3663
                    }
3664
                } else {
3665
                    return 'D';
3666
                }
3667
            } elsif ($Language_by_Extension{$extension} eq 'Smarty') {
3668
                # Smarty extension .tpl is generic; make sure the
3669
                # file at least roughly resembles PHP.  Alternatively,
3670
                # if the user forces the issue, do the count.
3671
                my $force_smarty = 0;
3672
                foreach (@opt_force_lang) {
3673
                    if (lc($_) eq "smarty,tpl") {
3674
                        $force_smarty = 1; 
3675
                        last;
3676
                    }
3677
                }
3678
                if (really_is_php($full_file) or $force_smarty) {
3679
                    return 'Smarty';
3680
                } else {
3681
                    return $language; # (unknown)
3682
                }
3683
            } else {
3684
                return $Language_by_Extension{$extension};
3685
            }
3686
        } else { # has an unmapped file extension
3687
            $look_at_first_line = 1;
3688
        }
3689
      }
3690
    } elsif (defined $Language_by_File{lc $file}) {
3691
        return $Language_by_File{lc $file};
3692
    } elsif ($opt_lang_no_ext and 
3693
             defined $Filters_by_Language{$opt_lang_no_ext}) {
3694
        return $opt_lang_no_ext;
3695
    } else {  # no file extension
3696
        $look_at_first_line = 1;
3697
    }
3698
 
3699
    if ($look_at_first_line) {
3700
        # maybe it is a shell/Perl/Python/Ruby/etc script that
3701
        # starts with pound bang:
3702
        #   #!/usr/bin/perl
3703
        #   #!/usr/bin/env perl
3704
        my $script_language = peek_at_first_line($full_file , 
3705
                                                 $rh_Err    , 
3706
                                                 $raa_errors);
3707
        if (!$script_language) {
3708
            $rh_ignored->{$full_file} = "language unknown (#2)";
3709
            # returns (unknown)
3710
        }
3711
        if (defined $Language_by_Script{$script_language}) {
3712
            if (defined $Filters_by_Language{
3713
                            $Language_by_Script{$script_language}}) {
3714
                $language = $Language_by_Script{$script_language};
3715
            } else {
3716
                $rh_ignored->{$full_file} = 
3717
                    "undefined:  Filters_by_Language{" . 
3718
                    $Language_by_Script{$script_language} .
3719
                    "} for scripting language $script_language";
3720
                # returns (unknown)
3721
            }
3722
        } else {
3723
            $rh_ignored->{$full_file} = "language unknown (#3)";
3724
            # returns (unknown)
3725
        }
3726
    }
3727
    print "<- classify_file($full_file)\n" if $opt_v > 2;
3728
    return $language;
3729
} # 1}}}
3730
sub peek_at_first_line {                     # {{{1
3731
    my ($file        , # in
3732
        $rh_Err      , # in   hash of error codes
3733
        $raa_errors  , # out
3734
       ) = @_;
3735
 
3736
    print "-> peek_at_first_line($file)\n" if $opt_v > 2;
3737
 
3738
    my $script_language = "";
3739
    if (!-r $file) {
3740
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
3741
        return $script_language;
3742
    }
3743
    my $IN = new IO::File $file, "r";
3744
    if (!defined $IN) {
3745
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
3746
        print "<- peek_at_first_line($file)\n" if $opt_v > 2;
3747
        return $script_language;
3748
    }
3749
    chomp(my $first_line = <$IN>);
3750
    if (defined $first_line) {
3751
#print "peek_at_first_line of [$file] first_line=[$first_line]\n";
3752
        if ($first_line =~ /^#\!\s*(\S.*?)$/) {
3753
#print "peek_at_first_line 1=[$1]\n";
3754
            my @pound_bang = split(' ', $1);
3755
#print "peek_at_first_line basename 0=[", basename($pound_bang[0]), "]\n";
3756
            if (basename($pound_bang[0]) eq "env" and 
3757
                scalar @pound_bang > 1) {
3758
                $script_language = $pound_bang[1];
3759
#print "peek_at_first_line pound_bang A $pound_bang[1]\n";
3760
            } else {
3761
                $script_language = basename $pound_bang[0];
3762
#print "peek_at_first_line pound_bang B $script_language\n";
3763
            }
3764
        }
3765
    }
3766
    $IN->close;
3767
    print "<- peek_at_first_line($file)\n" if $opt_v > 2;
3768
    return $script_language;
3769
} # 1}}}
3770
sub different_files {                        # {{{1
3771
    # See which of the given files are unique by computing each file's MD5
3772
    # sum.  Return the subset of files which are unique.
3773
    my ($ra_files    , # in
3774
        $rh_Err      , # in
3775
        $raa_errors  , # out
3776
        $rh_ignored  , # out
3777
       ) = @_;
3778
 
3779
    print "-> different_files(@{$ra_files})\n" if $opt_v > 2;
3780
    my %file_hash = ();  # file_hash{md5 hash} = [ file1, file2, ... ]
3781
    foreach my $F (@{$ra_files}) {
3782
        next if is_dir($F);  # needed for Windows
3783
        my $IN = new IO::File $F, "r";
3784
        if (!defined $IN) {
3785
            push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
3786
            $rh_ignored->{$F} = 'cannot read';
3787
        } else {
3788
            if ($HAVE_Digest_MD5) {
3789
                binmode $IN;
3790
                my $MD5 = Digest::MD5->new->addfile($IN)->hexdigest;
3791
#print "$F, $MD5\n";
3792
                push @{$file_hash{$MD5}}, $F;
3793
            } else {
3794
                # all files treated unique
3795
                push @{$file_hash{$F}}, $F;
3796
            }
3797
            $IN->close;
3798
        }
3799
    }
3800
 
3801
    # Loop over file sets having identical MD5 sums.  Within
3802
    # each set, pick the file that most resembles known source 
3803
    # code.
3804
    my @unique = ();
3805
    for my $md5 (sort keys %file_hash) {
3806
        my $i_best = 0;
3807
        for (my $i = 1; $i < scalar(@{$file_hash{$md5}}); $i++) {
3808
            my $F = $file_hash{$md5}[$i];
3809
            my (@nul_a, %nul_h);
3810
            my $language = classify_file($F, $rh_Err, 
3811
                                        # don't save these errors; pointless
3812
                                        \@nul_a, \%nul_h);
3813
            $i_best = $i if $language ne "(unknown)";
3814
        }
3815
        # keep the best one found and identify the rest as ignored
3816
        for (my $i = 0; $i < scalar(@{$file_hash{$md5}}); $i++) {
3817
            if ($i == $i_best) {
3818
                push @unique, $file_hash{$md5}[$i_best];
3819
            } else {
3820
                $rh_ignored->{$file_hash{$md5}[$i]} = "duplicate of " .
3821
                    $file_hash{$md5}[$i_best];
3822
            }
3823
        }
3824
 
3825
    }
3826
    print "<- different_files(@unique)\n" if $opt_v > 2;
3827
    return @unique;
3828
} # 1}}}
3829
sub call_counter {                           # {{{1
3830
    my ($file     , # in
3831
        $language , # in
3832
        $ra_Errors, # out
3833
       ) = @_;
3834
 
3835
    # Logic:  pass the file through the following filters:
3836
    #         1. remove blank lines
3837
    #         2. remove comments using each filter defined for this language
3838
    #            (example:  SQL has two, remove_starts_with(--) and 
3839
    #             remove_c_comments() )
3840
    #         3. compute comment lines as 
3841
    #               total lines - blank lines - lines left over after all
3842
    #                   comment filters have been applied
3843
 
3844
    print "-> call_counter($file, $language)\n" if $opt_v > 2;
3845
#print "call_counter:  ", Dumper(@routines), "\n";
3846
 
3847
    my @lines = ();
3848
    my $ascii = "";
3849
    if (-B $file and $opt_unicode) {
3850
        # was binary so must be unicode
3851
 
3852
        $/ = undef;
3853
        my $IN = new IO::File $file, "r";
3854
        my $bin_text = <$IN>;
3855
        $IN->close;
3856
        $/ = "\n";
3857
 
3858
        $ascii = unicode_to_ascii( $bin_text );
3859
        @lines = split("\n", $ascii );
3860
        foreach (@lines) { $_ = "$_\n"; }
3861
 
3862
    } else {
3863
        # regular text file
3864
        @lines = read_file($file);
3865
        $ascii = join('', @lines);
3866
    }
3867
 
3868
    my @original_lines = @lines;
3869
    my $total_lines    = scalar @lines;
3870
 
3871
    print_lines($file, "Original file:", \@lines) if $opt_print_filter_stages;
3872
    @lines = rm_blanks(\@lines, $language, \%EOL_Continuation_re); # remove blank lines
3873
    my $blank_lines = $total_lines - scalar @lines;
3874
    print_lines($file, "Blank lines removed:", \@lines) 
3875
        if $opt_print_filter_stages;
3876
 
3877
    @lines = rm_comments(\@lines, $language, $file,
3878
                               \%EOL_Continuation_re);
3879
 
3880
    my $comment_lines = $total_lines - $blank_lines - scalar  @lines;
3881
    if ($opt_strip_comments) {
3882
        my $stripped_file = "";
3883
        if ($opt_original_dir) {
3884
            $stripped_file =          $file . ".$opt_strip_comments";
3885
        } else {
3886
            $stripped_file = basename $file . ".$opt_strip_comments";
3887
        }
3888
        write_file($stripped_file, @lines);
3889
    }
3890
    if ($opt_html and !$opt_diff) {
3891
        chomp(@original_lines);  # includes blank lines, comments
3892
        chomp(@lines);           # no blank lines, no comments
3893
 
3894
        my (@diff_L, @diff_R, %count);
3895
 
3896
        # remove blank lines to get better quality diffs; count
3897
        # blank lines separately
3898
        my @original_lines_minus_white = ();
3899
        # however must keep track of how many blank lines were removed and
3900
        # where they were removed so that the HTML display can include it
3901
        my %blank_line  = ();
3902
        my $insert_line = 0;
3903
        foreach (@original_lines) {
3904
            if (/^\s*$/) {
3905
               ++$count{blank}{same};
3906
               ++$blank_line{ $insert_line };
3907
            } else {
3908
                ++$insert_line;
3909
                push @original_lines_minus_white, $_;
3910
            }
3911
        }
3912
 
3913
        array_diff( $file                       ,   # in
3914
                   \@original_lines_minus_white ,   # in
3915
                   \@lines                      ,   # in
3916
                   "comment"                    ,   # in
3917
                   \@diff_L, \@diff_R,          ,   # out
3918
                    $ra_Errors);                    # in/out
3919
        write_comments_to_html($file, \@diff_L, \@diff_R, \%blank_line);
3920
#print Dumper("count", \%count);
3921
    }
3922
 
3923
    print "<- call_counter($total_lines, $blank_lines, $comment_lines)\n" 
3924
        if $opt_v > 2;
3925
    return ($total_lines, $blank_lines, $comment_lines);
3926
} # 1}}}
3927
sub windows_glob {                           # {{{1
3928
    # Windows doesn't expand wildcards.  Use code from Sean M. Burke's 
3929
    # Win32::Autoglob module to do this.
3930
    return map {;
3931
        ( defined($_) and m/[\*\?]/ ) ? sort(glob($_)) : $_
3932
          } @_; 
3933
} # 1}}}
3934
sub write_file {                             # {{{1
3935
    my ($file  , # in
3936
        @lines , # in
3937
       ) = @_;
3938
 
3939
#print "write_file 1 [$file]\n";
3940
    # Do ~ expansion (by Tim LaBerge, fixes bug 2787984)
3941
    my $preglob_filename = $file;
3942
#print "write_file 2 [$preglob_filename]\n";
3943
    if ($ON_WINDOWS) {
3944
        $file = (windows_glob($file))[0];
3945
    } else {
3946
        $file = File::Glob::glob($file);
3947
    }
3948
#print "write_file 3 [$file]\n";
3949
    $file = $preglob_filename unless $file;
3950
#print "write_file 4 [$file]\n";
3951
 
3952
    print "-> write_file($file)\n" if $opt_v > 2;
3953
 
3954
    # Create the destination directory if it doesn't already exist.
3955
    my $abs_file_path = File::Spec->rel2abs( $file );
3956
    my ($volume, $directories, $filename) = File::Spec->splitpath( $abs_file_path );
3957
    mkpath($volume . $directories, 1, 0777);
3958
 
3959
    my $OUT = new IO::File $file, "w";
3960
    if (defined $OUT) {
3961
        chomp(@lines);
3962
        print $OUT join("\n", @lines), "\n";
3963
        $OUT->close;
3964
    } else {
3965
        warn "Unable to write to $file\n";
3966
    }
3967
    print "Wrote $file" unless $opt_quiet;
3968
    print ", $CLOC_XSL" if $opt_xsl and $opt_xsl eq $CLOC_XSL;
3969
    print "\n" unless $opt_quiet;
3970
 
3971
    print "<- write_file\n" if $opt_v > 2;
3972
} # 1}}}
3973
sub read_file  {                             # {{{1
3974
    my ($file, ) = @_;
3975
    my %BoM = (
3976
        "fe ff"           => 2 ,
3977
        "ff fe"           => 2 ,
3978
        "ef bb bf"        => 3 ,
3979
        "f7 64 4c"        => 3 ,
3980
        "0e fe ff"        => 3 ,
3981
        "fb ee 28"        => 3 ,
3982
        "00 00 fe ff"     => 4 ,
3983
        "ff fe 00 00"     => 4 ,
3984
        "2b 2f 76 38"     => 4 ,
3985
        "2b 2f 76 39"     => 4 ,
3986
        "2b 2f 76 2b"     => 4 ,
3987
        "2b 2f 76 2f"     => 4 ,
3988
        "dd 73 66 73"     => 4 ,
3989
        "84 31 95 33"     => 4 ,
3990
        "2b 2f 76 38 2d"  => 5 ,
3991
        );
3992
 
3993
    print "-> read_file($file)\n" if $opt_v > 2;
3994
    my @lines = ();
3995
    my $IN = new IO::File $file, "r";
3996
    if (defined $IN) {
3997
        @lines = <$IN>;
3998
        $IN->close;
3999
        if ($lines[$#lines]) {  # test necessary for zero content files
4000
                                # (superfluous?)
4001
            # Some files don't end with a new line.  Force this:
4002
            $lines[$#lines] .= "\n" unless $lines[$#lines] =~ m/\n$/;
4003
        }
4004
    } else {
4005
        warn "Unable to read $file\n";
4006
    }
4007
 
4008
    # Are first few characters of the file Unicode Byte Order
4009
    # Marks (http://en.wikipedia.org/wiki/Byte_Order_Mark)? 
4010
    # If yes, remove them.
4011
    if (@lines) {
4012
        my @chrs   = split('', $lines[0]);
4013
        my $n_chrs = scalar @chrs;
4014
        my ($n2, $n3, $n4, $n5) = ('', '', '', '');
4015
        $n2 = sprintf("%x %x", map  ord, @chrs[0,1]) if $n_chrs >= 2;
4016
        $n3 = sprintf("%s %x", $n2, ord  $chrs[2])   if $n_chrs >= 3;
4017
        $n4 = sprintf("%s %x", $n3, ord  $chrs[3])   if $n_chrs >= 4;
4018
        $n5 = sprintf("%s %x", $n4, ord  $chrs[4])   if $n_chrs >= 5;
4019
        if      (defined $BoM{$n2}) { $lines[0] = substr $lines[0], 2;
4020
        } elsif (defined $BoM{$n3}) { $lines[0] = substr $lines[0], 3;
4021
        } elsif (defined $BoM{$n4}) { $lines[0] = substr $lines[0], 4;
4022
        } elsif (defined $BoM{$n5}) { $lines[0] = substr $lines[0], 5;
4023
        }
4024
    }
4025
 
4026
    # Trim DOS line endings.  This allows Windows files
4027
    # to be diff'ed with Unix files without line endings
4028
    # causing every line to differ.
4029
    foreach (@lines) { s/\cM$// }
4030
 
4031
    print "<- read_file\n" if $opt_v > 2;
4032
    return @lines;
4033
} # 1}}}
4034
sub rm_blanks {                              # {{{1
4035
    my ($ra_in    ,
4036
        $language ,
4037
        $rh_EOL_continuation_re) = @_;
4038
    print "-> rm_blanks(language=$language)\n" if $opt_v > 2;
4039
#print "rm_blanks: language = [$language]\n";
4040
    my @out = ();
4041
    if ($language eq "COBOL") {
4042
        @out = remove_cobol_blanks($ra_in);
4043
    } else {
4044
        # removes blank lines
4045
        if (defined $rh_EOL_continuation_re->{$language}) {
4046
            @out = remove_matches_2re($ra_in, '^\s*$', 
4047
                                      $rh_EOL_continuation_re->{$language}); 
4048
        } else {
4049
            @out = remove_matches($ra_in, '^\s*$');
4050
        }
4051
    }
4052
 
4053
    print "<- rm_blanks(language=$language)\n" if $opt_v > 2;
4054
    return @out;
4055
} # 1}}}
4056
sub rm_comments {                            # {{{1
4057
    my ($ra_lines , # in, must be free of blank lines
4058
        $language , # in
4059
        $file     , # in (some language counters, eg Haskell, need 
4060
                    #     access to the original file)
4061
        $rh_EOL_continuation_re , # in
4062
       ) = @_;
4063
    print "-> rm_comments(file=$file)\n" if $opt_v > 2;
4064
    my @routines       = @{$Filters_by_Language{$language}};
4065
    my @lines          = @{$ra_lines};
4066
    my @original_lines = @{$ra_lines};
4067
 
4068
    if (!scalar @original_lines) {
4069
        return @lines;
4070
    }
4071
 
4072
    foreach my $call_string (@routines) {
4073
        my $subroutine = $call_string->[0];
4074
        if (! defined &{$subroutine}) {
4075
            warn "rm_comments undefined subroutine $subroutine for $file\n";
4076
            next;
4077
        }
4078
        print "rm_comments file=$file sub=$subroutine\n" if $opt_v > 1;
4079
        my @args  = @{$call_string};
4080
        shift @args; # drop the subroutine name
4081
        if (@args and $args[0] eq '>filename<') {
4082
            shift   @args;
4083
            unshift @args, $file;
4084
        }
4085
 
4086
        no strict 'refs';
4087
        @lines = &{$subroutine}(\@lines, @args);   # apply filter...
4088
 
4089
        print_lines($file, "After $subroutine(@args)", \@lines) 
4090
            if $opt_print_filter_stages;
4091
        # then remove blank lines which are created by comment removal
4092
        if (defined $rh_EOL_continuation_re->{$language}) {
4093
            @lines = remove_matches_2re(\@lines, '^\s*$',
4094
                                        $rh_EOL_continuation_re->{$language});
4095
        } else {
4096
            @lines = remove_matches(\@lines, '^\s*$');
4097
        }
4098
 
4099
        print_lines($file, "post $subroutine(@args) blank cleanup:", \@lines) 
4100
            if $opt_print_filter_stages;
4101
    }
4102
    # Exception for scripting languages:  treat the first #! line as code.
4103
    # Will need to add it back in if it was removed earlier.
4104
    if (defined $Script_Language{$language} and 
4105
        $original_lines[0] =~ /^#!/ and
4106
        (scalar(@lines) == 0 or 
4107
         $lines[0] ne $original_lines[0])) {
4108
        unshift @lines, $original_lines[0];  # add the first line back
4109
    }
4110
 
4111
    foreach (@lines) { chomp }   # make sure no spurious newlines were added
4112
 
4113
    print "<- rm_comments\n" if $opt_v > 2;
4114
    return @lines;
4115
} # 1}}}
4116
sub remove_f77_comments {                    # {{{1
4117
    my ($ra_lines, ) = @_;
4118
    print "-> remove_f77_comments\n" if $opt_v > 2;
4119
 
4120
    my @save_lines = ();
4121
    foreach (@{$ra_lines}) {
4122
        next if m{^[*cC]};
4123
        next if m{^\s*!};
4124
        push @save_lines, $_;
4125
    }
4126
 
4127
    print "<- remove_f77_comments\n" if $opt_v > 2;
4128
    return @save_lines;
4129
} # 1}}}
4130
sub remove_f90_comments {                    # {{{1
4131
    # derived from SLOCCount
4132
    my ($ra_lines, ) = @_;
4133
    print "-> remove_f90_comments\n" if $opt_v > 2;
4134
 
4135
    my @save_lines = ();
4136
    foreach (@{$ra_lines}) {
4137
        # a comment is              m/^\s*!/
4138
        # an empty line is          m/^\s*$/
4139
        # a HPF statement is        m/^\s*!hpf\$/i
4140
        # an Open MP statement is   m/^\s*!omp\$/i
4141
        if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) {
4142
            push @save_lines, $_;
4143
        }
4144
    }
4145
 
4146
    print "<- remove_f90_comments\n" if $opt_v > 2;
4147
    return @save_lines;
4148
} # 1}}}
4149
sub remove_matches {                         # {{{1
4150
    my ($ra_lines, # in
4151
        $pattern , # in   Perl regular expression (case insensitive)
4152
       ) = @_;
4153
    print "-> remove_matches(pattern=$pattern)\n" if $opt_v > 2;
4154
 
4155
    my @save_lines = ();
4156
    foreach (@{$ra_lines}) {
4157
#chomp; print "remove_matches [$pattern] [$_]\n";
4158
        next if m{$pattern}i;
4159
        push @save_lines, $_;
4160
    }
4161
 
4162
    print "<- remove_matches\n" if $opt_v > 2;
4163
#print "remove_matches returning\n   ", join("\n   ", @save_lines), "\n";
4164
    return @save_lines;
4165
} # 1}}}
4166
sub remove_matches_2re {                     # {{{1
4167
    my ($ra_lines, # in
4168
        $pattern1, # in Perl regex 1 (case insensitive) to match
4169
        $pattern2, # in Perl regex 2 (case insensitive) to not match prev line
4170
       ) = @_;
4171
    print "-> remove_matches_2re(pattern=$pattern1,$pattern2)\n" if $opt_v > 2;
4172
 
4173
    my @save_lines = ();
4174
    for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
4175
#       chomp($ra_lines->[$i]);
4176
#print "remove_matches_2re [$pattern1] [$pattern2] [$ra_lines->[$i]]\n";
4177
        if ($i) {
4178
#print "remove_matches_2re prev=[$ra_lines->[$i-1]] this=[$ra_lines->[$i]]\n";
4179
            next if ($ra_lines->[$i]   =~ m{$pattern1}i) and 
4180
                    ($ra_lines->[$i-1] !~ m{$pattern2}i);
4181
        } else {
4182
            # on first line
4183
            next if $ra_lines->[$i]   =~  m{$pattern1}i;
4184
        }
4185
        push @save_lines, $ra_lines->[$i];
4186
    }
4187
 
4188
    print "<- remove_matches_2re\n" if $opt_v > 2;
4189
#print "remove_matches_2re returning\n   ", join("\n   ", @save_lines), "\n";
4190
    return @save_lines;
4191
} # 1}}}
4192
sub remove_inline {                          # {{{1
4193
    my ($ra_lines, # in
4194
        $pattern , # in   Perl regular expression (case insensitive)
4195
       ) = @_;
4196
    print "-> remove_inline(pattern=$pattern)\n" if $opt_v > 2;
4197
 
4198
    my @save_lines = ();
4199
    unless ($opt_inline) {
4200
        return @{$ra_lines};
4201
    }
4202
    my $nLines_affected = 0;
4203
    foreach (@{$ra_lines}) {
4204
#chomp; print "remove_inline [$pattern] [$_]\n";
4205
        if (m{$pattern}i) {
4206
            ++$nLines_affected;
4207
            s{$pattern}{}i;
4208
        }
4209
        push @save_lines, $_;
4210
    }
4211
 
4212
    print "<- remove_inline\n" if $opt_v > 2;
4213
#print "remove_inline returning\n   ", join("\n   ", @save_lines), "\n";
4214
    return @save_lines;
4215
} # 1}}}
4216
sub remove_above {                           # {{{1
4217
    my ($ra_lines, $marker, ) = @_;
4218
    print "-> remove_above(marker=$marker)\n" if $opt_v > 2;
4219
 
4220
    # Make two passes through the code:
4221
    # 1. check if the marker exists
4222
    # 2. remove anything above the marker if it exists,
4223
    #    do nothing if the marker does not exist
4224
 
4225
    # Pass 1
4226
    my $found_marker = 0;
4227
    for (my $line_number  = 1;
4228
            $line_number <= scalar @{$ra_lines};
4229
            $line_number++) {
4230
        if ($ra_lines->[$line_number-1] =~ m{$marker}) {
4231
            $found_marker = $line_number;
4232
            last;
4233
        }
4234
    }
4235
 
4236
    # Pass 2 only if needed
4237
    my @save_lines = ();
4238
    if ($found_marker) {
4239
        my $n = 1;
4240
        foreach (@{$ra_lines}) {
4241
            push @save_lines, $_
4242
                if $n >= $found_marker;
4243
            ++$n;
4244
        }
4245
    } else { # marker wasn't found; save all lines
4246
        foreach (@{$ra_lines}) {
4247
            push @save_lines, $_;
4248
        }
4249
    }
4250
 
4251
    print "<- remove_above\n" if $opt_v > 2;
4252
    return @save_lines;
4253
} # 1}}}
4254
sub remove_below {                           # {{{1
4255
    my ($ra_lines, $marker, ) = @_;
4256
    print "-> remove_below(marker=$marker)\n" if $opt_v > 2;
4257
 
4258
    my @save_lines = ();
4259
    foreach (@{$ra_lines}) {
4260
        last if m{$marker};
4261
        push @save_lines, $_;
4262
    }
4263
 
4264
    print "<- remove_below\n" if $opt_v > 2;
4265
    return @save_lines;
4266
} # 1}}}
4267
sub remove_below_above {                     # {{{1
4268
    my ($ra_lines, $marker_below, $marker_above, ) = @_;
4269
    # delete lines delimited by start and end line markers such
4270
    # as Perl POD documentation
4271
    print "-> remove_below_above(markerB=$marker_below, A=$marker_above)\n" 
4272
        if $opt_v > 2;
4273
 
4274
    my @save_lines = ();
4275
    my $between    = 0;
4276
    foreach (@{$ra_lines}) {
4277
        if (!$between and m{$marker_below}) {
4278
            $between    = 1;
4279
            next;
4280
        }
4281
        if ($between and m{$marker_above}) {
4282
            $between    = 0;
4283
            next;
4284
        }
4285
        next if $between;
4286
        push @save_lines, $_;
4287
    }
4288
 
4289
    print "<- remove_below_above\n" if $opt_v > 2;
4290
    return @save_lines;
4291
} # 1}}}
4292
sub remove_between {                         # {{{1
4293
    my ($ra_lines, $marker, ) = @_;
4294
    # $marker must contain one of the balanced pairs understood
4295
    # by Regexp::Common::balanced, namely
4296
    # '{}'  '()'  '[]'  or  '<>'
4297
 
4298
    print "-> remove_between(marker=$marker)\n" if $opt_v > 2;
4299
    my %acceptable = ('{}'=>1,  '()'=>1,  '[]'=>1,  '<>'=>1, );
4300
    die "remove_between:  invalid delimiter '$marker'\n",
4301
        "the delimiter must be one of these four pairs:\n",
4302
        "{}  ()  []  <>\n" unless
4303
        $acceptable{$marker};
4304
 
4305
    Install_Regexp_Common() unless $HAVE_Rexexp_Common;
4306
 
4307
    my $all_lines = join("", @{$ra_lines});
4308
 
4309
    no strict 'vars';
4310
    # otherwise get:
4311
    #  Global symbol "%RE" requires explicit package name at cloc line xx.
4312
    if ($all_lines =~ m/$RE{balanced}{-parens => $marker}/) {
4313
        no warnings; 
4314
        $all_lines =~ s/$1//g;
4315
    }
4316
 
4317
    print "<- remove_between\n" if $opt_v > 2;
4318
    return split("\n", $all_lines);
4319
} # 1}}}
4320
sub remove_between_general {                 # {{{1
4321
    my ($ra_lines, $start_marker, $end_marker, ) = @_;
4322
    # Start and end markers may be any length strings.
4323
 
4324
    print "-> remove_between_general(start=$start_marker, end=$end_marker)\n"
4325
        if $opt_v > 2;
4326
 
4327
    my $all_lines = join("", @{$ra_lines});
4328
 
4329
    my @save_lines = ();
4330
    my $in_comment = 0;
4331
    foreach (@{$ra_lines}) {
4332
 
4333
        next if /^\s*$/;
4334
        s/\Q$start_marker\E.*?\Q$end_marker\E//g;  # strip one-line comments
4335
        next if /^\s*$/;
4336
        if ($in_comment) {
4337
            if (/\Q$end_marker\E/) {
4338
                s/^.*?\Q$end_marker\E//;
4339
                $in_comment = 0;
4340
            }
4341
            next if $in_comment;
4342
        }
4343
        next if /^\s*$/;
4344
        $in_comment = 1 if /^(.*?)\Q$start_marker\E/; # $1 may be blank or code
4345
        next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
4346
        if ($in_comment) {
4347
            # part code, part comment; strip the comment and keep the code
4348
            s/^(.*?)\Q$start_marker\E.*$/$1/;
4349
        }
4350
        push @save_lines, $_;
4351
    }
4352
 
4353
    print "<- remove_between_general\n" if $opt_v > 2;
4354
    return @save_lines;
4355
} # 1}}}
4356
sub remove_between_regex   {                 # {{{1
4357
    my ($ra_lines, $start_RE, $end_RE, ) = @_;
4358
    # Start and end regex's may be any length strings.
4359
 
4360
    print "-> remove_between_regex(start=$start_RE, end=$end_RE)\n"
4361
        if $opt_v > 2;
4362
 
4363
    my $all_lines = join("", @{$ra_lines});
4364
 
4365
    my @save_lines = ();
4366
    my $in_comment = 0;
4367
    foreach (@{$ra_lines}) {
4368
 
4369
        next if /^\s*$/;
4370
        s/${start_RE}.*?${end_RE}//g;  # strip one-line comments
4371
        next if /^\s*$/;
4372
        if ($in_comment) {
4373
            if (/$end_RE/) {
4374
                s/^.*?${end_RE}//;
4375
                $in_comment = 0;
4376
            }
4377
            next if $in_comment;
4378
        }   
4379
        next if /^\s*$/;
4380
        $in_comment = 1 if /^(.*?)${start_RE}/; # $1 may be blank or code
4381
        next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
4382
        if ($in_comment) {
4383
            # part code, part comment; strip the comment and keep the code
4384
            s/^(.*?)${start_RE}.*$/$1/;
4385
        }
4386
        push @save_lines, $_;
4387
    }
4388
 
4389
    print "<- remove_between_regex\n" if $opt_v > 2;
4390
    return @save_lines;
4391
} # 1}}}
4392
sub remove_cobol_blanks {                    # {{{1
4393
    # subroutines derived from SLOCCount
4394
    my ($ra_lines, ) = @_;
4395
 
4396
    my $free_format = 0;  # Support "free format" source code.
4397
    my @save_lines  = ();
4398
 
4399
    foreach (@{$ra_lines}) {
4400
        next if m/^\s*$/;
4401
        my $line = expand($_);  # convert tabs to equivalent spaces
4402
        $free_format = 1 if $line =~ m/^......\$.*SET.*SOURCEFORMAT.*FREE/i;
4403
        if ($free_format) {
4404
            push @save_lines, $_;
4405
        } else {
4406
            # Greg Toth:
4407
            #  (1) Treat lines with any alphanum in cols 1-6 and 
4408
            #      blanks in cols 7 through 71 as blank line, and
4409
            #  (2) Treat lines with any alphanum in cols 1-6 and 
4410
            #      slash (/) in col 7 as blank line (this is a 
4411
            #      page eject directive). 
4412
            push @save_lines, $_ unless m/^\d{6}\s*$/             or 
4413
                                        ($line =~ m/^.{6}\s{66}/) or 
4414
                                        ($line =~ m/^......\//);
4415
        }
4416
    }
4417
    return @save_lines;
4418
} # 1}}}
4419
sub remove_cobol_comments {                  # {{{1
4420
    # subroutines derived from SLOCCount
4421
    my ($ra_lines, ) = @_;
4422
 
4423
    my $free_format = 0;  # Support "free format" source code.
4424
    my @save_lines  = ();
4425
 
4426
    foreach (@{$ra_lines}) {
4427
        if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;}
4428
        if ($free_format) {
4429
            push @save_lines, $_ unless m{^\s*\*};
4430
        } else {
4431
            push @save_lines, $_ unless m{^......\*} or m{^\*};
4432
        }
4433
    }
4434
    return @save_lines;
4435
} # 1}}}
4436
sub remove_jcl_comments {                    # {{{1
4437
    my ($ra_lines, ) = @_;
4438
 
4439
    print "-> remove_jcl_comments\n" if $opt_v > 2;
4440
 
4441
    my @save_lines = ();
4442
    my $in_comment = 0;
4443
    foreach (@{$ra_lines}) {
4444
        next if /^\s*$/;
4445
        next if m{^\s*//\*};
4446
        last if m{^\s*//\s*$};
4447
        push @save_lines, $_;
4448
    }
4449
 
4450
    print "<- remove_jcl_comments\n" if $opt_v > 2;
4451
    return @save_lines;
4452
} # 1}}}
4453
sub remove_jsp_comments {                    # {{{1
4454
    #  JSP comment is   <%--  body of comment   --%>
4455
    my ($ra_lines, ) = @_;
4456
 
4457
    print "-> remove_jsp_comments\n" if $opt_v > 2;
4458
 
4459
    my @save_lines = ();
4460
    my $in_comment = 0;
4461
    foreach (@{$ra_lines}) {
4462
 
4463
        next if /^\s*$/;
4464
        s/<\%\-\-.*?\-\-\%>//g;  # strip one-line comments
4465
        next if /^\s*$/;
4466
        if ($in_comment) {
4467
            if (/\-\-\%>/) {
4468
                s/^.*?\-\-\%>//;
4469
                $in_comment = 0;
4470
            }
4471
        }
4472
        next if /^\s*$/;
4473
        $in_comment = 1 if /^(.*?)<\%\-\-/;
4474
        next if defined $1 and $1 =~ /^\s*$/;
4475
        next if ($in_comment);
4476
        push @save_lines, $_;
4477
    }
4478
 
4479
    print "<- remove_jsp_comments\n" if $opt_v > 2;
4480
    return @save_lines;
4481
} # 1}}}
4482
sub remove_html_comments {                   # {{{1
4483
    #  HTML comment is   <!--  body of comment   -->
4484
    #  Need to use my own routine until the HTML comment regex in
4485
    #  the Regexp::Common module can handle  <!--  --  -->
4486
    my ($ra_lines, ) = @_;
4487
 
4488
    print "-> remove_html_comments\n" if $opt_v > 2;
4489
 
4490
    my @save_lines = ();
4491
    my $in_comment = 0;
4492
    foreach (@{$ra_lines}) {
4493
 
4494
        next if /^\s*$/;
4495
        s/<!\-\-.*?\-\->//g;  # strip one-line comments
4496
        next if /^\s*$/;
4497
        if ($in_comment) {
4498
            if (/\-\->/) {
4499
                s/^.*?\-\->//;
4500
                $in_comment = 0;
4501
            }
4502
        }
4503
        next if /^\s*$/;
4504
        $in_comment = 1 if /^(.*?)<!\-\-/;
4505
        next if defined $1 and $1 =~ /^\s*$/;
4506
        next if ($in_comment);
4507
        push @save_lines, $_;
4508
    }
4509
 
4510
    print "<- remove_html_comments\n" if $opt_v > 2;
4511
    return @save_lines;
4512
} # 1}}}
4513
sub remove_haml_block {                      # {{{1
4514
    # Haml block comments are defined by a silent comment marker like
4515
    #    /
4516
    # or
4517
    #    -#
4518
    # followed by indented text on subsequent lines.
4519
    # http://haml.info/docs/yardoc/file.REFERENCE.html#comments
4520
    my ($ra_lines, ) = @_;
4521
 
4522
    print "-> remove_haml_block\n" if $opt_v > 2;
4523
 
4524
    my @save_lines = ();
4525
    my $in_comment = 0;
4526
    foreach (@{$ra_lines}) {
4527
 
4528
        next if /^\s*$/;
4529
        my $line = expand($_);  # convert tabs to equivalent spaces
4530
        if ($in_comment) {
4531
            $line =~ /^(\s*)/;
4532
            # print "indent=", length $1, "\n";
4533
            if (length $1 < $in_comment) {
4534
                # indent level is less than comment level
4535
                # are back in code
4536
                $in_comment = 0;
4537
            } else {
4538
                # still in comments, don't use this line
4539
                next;
4540
            }
4541
        } elsif ($line =~ m{^(\s*)(/|-#)\s*$}) {
4542
            if ($1) {
4543
                $in_comment = length $1 + 1; # number of leading spaces + 1
4544
            } else {
4545
                $in_comment = 1;
4546
            }
4547
            # print "in_comment=$in_comment\n";
4548
            next;
4549
        }
4550
        push @save_lines, $line;
4551
    }
4552
 
4553
    print "<- remove_haml_block\n" if $opt_v > 2;
4554
    return @save_lines;
4555
} # 1}}}
4556
sub add_newlines {                           # {{{1
4557
    my ($ra_lines, ) = @_;
4558
    print "-> add_newlines \n" if $opt_v > 2;
4559
 
4560
    my @save_lines = ();
4561
    foreach (@{$ra_lines}) {
4562
 
4563
        push @save_lines, "$_\n";
4564
    }
4565
 
4566
    print "<- add_newlines \n" if $opt_v > 2;
4567
    return @save_lines;
4568
} # 1}}}
4569
sub docstring_to_C {                         # {{{1
4570
    my ($ra_lines, ) = @_;
4571
    # Converts Python docstrings to C comments.
4572
 
4573
    print "-> docstring_to_C()\n" if $opt_v > 2;
4574
 
4575
    my $in_docstring = 0;
4576
    foreach (@{$ra_lines}) {
4577
        while (/"""/) {
4578
            if (!$in_docstring) {
4579
                s{[uU]?"""}{/*};
4580
                $in_docstring = 1;
4581
            } else {
4582
                s{"""}{*/};
4583
                $in_docstring = 0;
4584
            }
4585
        }
4586
    }
4587
 
4588
    print "<- docstring_to_C\n" if $opt_v > 2;
4589
    return @{$ra_lines};
4590
} # 1}}}
4591
sub powershell_to_C {                        # {{{1
4592
    my ($ra_lines, ) = @_;
4593
    # Converts PowerShell block comment markers to C comments.
4594
 
4595
    print "-> powershell_to_C()\n" if $opt_v > 2;
4596
 
4597
    my $in_docstring = 0;
4598
    foreach (@{$ra_lines}) {
4599
        s{<#}{/*}g;
4600
        s{#>}{*/}g;
4601
    }
4602
 
4603
    print "<- powershell_to_C\n" if $opt_v > 2;
4604
    return @{$ra_lines};
4605
} # 1}}}
4606
sub smarty_to_C {                            # {{{1
4607
    my ($ra_lines, ) = @_;
4608
    # Converts Smarty comments to C comments.
4609
 
4610
    print "-> smarty_to_C()\n" if $opt_v > 2;
4611
 
4612
    foreach (@{$ra_lines}) {
4613
        s[{\*][/*]g;
4614
        s[\*}][*/]g;
4615
    }
4616
 
4617
    print "<- smarty_to_C\n" if $opt_v > 2;
4618
    return @{$ra_lines};
4619
} # 1}}}
4620
sub determine_lit_type {                     # {{{1
4621
  my ($file) = @_;
4622
 
4623
  open (FILE, $file);
4624
  while (<FILE>) {
4625
    if (m/^\\begin\{code\}/) { close FILE; return 2; }
4626
    if (m/^>\s/) { close FILE; return 1; }
4627
  }
4628
 
4629
  return 0;
4630
} # 1}}}
4631
sub remove_haskell_comments {                # {{{1
4632
    # Bulk of code taken from SLOCCount's haskell_count script.
4633
    # Strips out {- .. -} and -- comments and counts the rest.
4634
    # Pragmas, {-#...}, are counted as SLOC.
4635
    # BUG: Doesn't handle strings with embedded block comment markers gracefully.
4636
    #      In practice, that shouldn't be a problem.
4637
    my ($ra_lines, $file, ) = @_;
4638
 
4639
    print "-> remove_haskell_comments\n" if $opt_v > 2;
4640
 
4641
    my @save_lines = ();
4642
    my $in_comment = 0;
4643
    my $incomment  = 0;
4644
    my ($literate, $inlitblock) = (0,0);
4645
 
4646
    $literate = 1 if $file =~ /\.lhs$/;
4647
    if($literate) { $literate = determine_lit_type($file) }
4648
 
4649
    foreach (@{$ra_lines}) {
4650
        if ($literate == 1) {
4651
            if (!s/^>//) { s/.*//; }
4652
        } elsif ($literate == 2) {
4653
            if ($inlitblock) {
4654
                if (m/^\\end\{code\}/) { s/.*//; $inlitblock = 0; }
4655
            } elsif (!$inlitblock) {
4656
                if (m/^\\begin\{code\}/) { s/.*//; $inlitblock = 1; }
4657
                else { s/.*//; }
4658
            }
4659
        }
4660
 
4661
        if ($incomment) {
4662
            if (m/\-\}/) { s/^.*?\-\}//;  $incomment = 0;}
4663
            else { s/.*//; }
4664
        }
4665
        if (!$incomment) {
4666
            s/--.*//;
4667
            s!{-[^#].*?-}!!g;
4668
            if (m/{-/ && (!m/{-#/)) {
4669
              s/{-.*//;
4670
              $incomment = 1;
4671
            }
4672
        }
4673
        if (m/\S/) { push @save_lines, $_; }
4674
    }
4675
#   if ($incomment) {print "ERROR: ended in comment in $ARGV\n";}
4676
 
4677
    print "<- remove_haskell_comments\n" if $opt_v > 2;
4678
    return @save_lines;
4679
} # 1}}}
4680
sub print_lines {                            # {{{1
4681
    my ($file     , # in
4682
        $title    , # in
4683
        $ra_lines , # in
4684
       ) = @_;
4685
    printf "->%-30s %s\n", $file, $title;
4686
    for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
4687
        printf "%5d | %s", $i+1, $ra_lines->[$i];
4688
        print "\n" unless $ra_lines->[$i] =~ m{\n$}
4689
    }
4690
} # 1}}}
4691
sub set_constants {                          # {{{1
4692
    my ($rh_Language_by_Extension , # out
4693
        $rh_Language_by_Script    , # out
4694
        $rh_Language_by_File      , # out
4695
        $rhaa_Filters_by_Language , # out
4696
        $rh_Not_Code_Extension    , # out
4697
        $rh_Not_Code_Filename     , # out
4698
        $rh_Scale_Factor          , # out
4699
        $rh_Known_Binary_Archives , # out
4700
        $rh_EOL_continuation_re   , # out
4701
       ) = @_;
4702
# 1}}}
4703
%{$rh_Language_by_Extension} = (             # {{{1
4704
            'abap'        => 'ABAP'                  ,
4705
            'ac'          => 'm4'                    ,
4706
            'ada'         => 'Ada'                   ,
4707
            'adb'         => 'Ada'                   ,
4708
            'ads'         => 'Ada'                   ,
4709
            'adso'        => 'ADSO/IDSM'             ,
4710
            'ahk'         => 'AutoHotkey'            ,
4711
            'am'          => 'make'                  ,
4712
            'ample'       => 'AMPLE'                 ,
4713
            'as'          => 'ActionScript'          ,
4714
            'dofile'      => 'AMPLE'                 ,
4715
            'startup'     => 'AMPLE'                 ,
4716
            'asa'         => 'ASP'                   ,
4717
            'asax'        => 'ASP.Net'               ,
4718
            'ascx'        => 'ASP.Net'               ,
4719
            'asm'         => 'Assembly'              ,
4720
            'asmx'        => 'ASP.Net'               ,
4721
            'asp'         => 'ASP'                   ,
4722
            'aspx'        => 'ASP.Net'               ,
4723
            'master'      => 'ASP.Net'               ,
4724
            'sitemap'     => 'ASP.Net'               ,
4725
            'cshtml'      => 'Razor'                 ,
4726
            'awk'         => 'awk'                   ,
4727
            'bash'        => 'Bourne Again Shell'    ,
4728
            'bas'         => 'Visual Basic'          ,
4729
            'dxl'         => 'DOORS Extension Language',
4730
            'bat'         => 'DOS Batch'             ,
4731
            'BAT'         => 'DOS Batch'             ,
4732
            'cmd'         => 'DOS Batch'             ,
4733
            'CMD'         => 'DOS Batch'             ,
4734
            'btm'         => 'DOS Batch'             ,
4735
            'BTM'         => 'DOS Batch'             ,
4736
            'build.xml'   => 'Ant'                   ,
4737
            'cbl'         => 'COBOL'                 ,
4738
            'CBL'         => 'COBOL'                 ,
4739
            'c'           => 'C'                     ,
4740
            'C'           => 'C++'                   ,
4741
            'cc'          => 'C++'                   ,
4742
            'c++'         => 'C++'                   ,
4743
            'ccs'         => 'CCS'                   ,
4744
            'cfc'         => 'ColdFusion CFScript'   ,
4745
            'cfm'         => 'ColdFusion'            ,
4746
            'cl'          => 'Lisp/OpenCL'           ,
4747
            'clj'         => 'Clojure'               ,
4748
            'cljs'        => 'ClojureScript'         ,
4749
            'cls'         => 'Visual Basic'          , # also Apex Class
4750
            'CMakeLists.txt' => 'CMake'              ,
4751
            'cmake'       => 'CMake'                 ,
4752
            'cob'         => 'COBOL'                 ,
4753
            'COB'         => 'COBOL'                 ,
4754
            'coffee'      => 'CoffeeScript'          ,
4755
            'component'   => 'Visualforce Component' ,
4756
            'cpp'         => 'C++'                   ,
4757
            'cs'          => 'C#'                    ,
4758
            'csh'         => 'C Shell'               ,
4759
            'css'         => "CSS"                   ,
4760
            'ctl'         => 'Visual Basic'          ,
4761
            'cu'          => 'CUDA'                  ,
4762
            'cxx'         => 'C++'                   ,
4763
            'd'           => 'D/dtrace'              ,
4764
# in addition, .d can map to init.d files typically written as 
4765
# bash or sh scripts
4766
            'da'          => 'DAL'                   ,
4767
            'dart'        => 'Dart'                  ,
4768
            'def'         => 'Windows Module Definition',
4769
            'diff'        => 'diff'                  ,
4770
            'dmap'        => 'NASTRAN DMAP'          ,
4771
            'dpr'         => 'Pascal'                ,
4772
            'dita'        => 'DITA'                  ,
4773
            'dsr'         => 'Visual Basic'          ,
4774
            'dtd'         => 'DTD'                   ,
4775
            'ec'          => 'C'                     ,
4776
            'ecpp'        => 'ECPP'                  ,
4777
            'el'          => 'Lisp'                  ,
4778
            'exs'         => 'Elixir'                ,
4779
            'ex'          => 'Elixir'                ,
4780
            'erb'         => 'ERB'                   ,
4781
            'ERB'         => 'ERB'                   ,
4782
            'erl'         => 'Erlang'                ,
4783
            'exp'         => 'Expect'                ,
4784
            'f77'         => 'Fortran 77'            ,
4785
            'F77'         => 'Fortran 77'            ,
4786
            'f90'         => 'Fortran 90'            ,
4787
            'F90'         => 'Fortran 90'            ,
4788
            'f95'         => 'Fortran 95'            ,
4789
            'F95'         => 'Fortran 95'            ,
4790
            'f'           => 'Fortran 77'            ,
4791
            'F'           => 'Fortran 77'            ,
4792
            'for'         => 'Fortran 77'            ,
4793
            'FOR'         => 'Fortran 77'            ,
4794
            'ftn'         => 'Fortran 77'            ,
4795
            'FTN'         => 'Fortran 77'            ,
4796
            'fmt'         => 'Oracle Forms'          ,
4797
            'focexec'     => 'Focus'                 ,
4798
            'frm'         => 'Visual Basic'          ,
4799
            'fs'          => 'F#'                    ,
4800
            'fsi'         => 'F#'                    ,
4801
            'gnumakefile' => 'make'                  ,
4802
            'Gnumakefile' => 'make'                  ,
4803
            'go'          => 'Go'                    ,
4804
            'gsp'         => 'Grails'                ,
4805
            'groovy'      => 'Groovy'                ,
4806
            'gant'        => 'Groovy'                ,
4807
            'gradle'      => 'Groovy'                ,
4808
            'h'           => 'C/C++ Header'          ,
4809
            'H'           => 'C/C++ Header'          ,
4810
            'hh'          => 'C/C++ Header'          ,
4811
            'hpp'         => 'C/C++ Header'          ,
4812
            'hb'          => 'Harbour'               ,
4813
            'hrl'         => 'Erlang'                ,
4814
            'hs'          => 'Haskell'               , 
4815
            'hlsl'        => 'HLSL'                  ,
4816
            'shader'      => 'HLSL'                  ,
4817
            'cg'          => 'HLSL'                  ,
4818
            'cginc'       => 'HLSL'                  ,
4819
            'haml'        => 'Haml'                  ,
4820
            'handlebars'  => 'Handlebars'            ,
4821
            'hbs'         => 'Handlebars'            ,
4822
            'htm'         => 'HTML'                  ,
4823
            'html'        => 'HTML'                  ,
4824
            'i3'          => 'Modula3'               ,
4825
            'idl'         => 'IDL'                   ,
4826
            'ism'         => 'InstallShield'         ,
4827
            'pro'         => 'IDL/Qt Project/Prolog' ,
4828
            'ig'          => 'Modula3'               ,
4829
            'il'          => 'SKILL'                 ,
4830
            'ils'         => 'SKILL++'               ,
4831
            'inc'         => 'PHP/Pascal'            , # might be PHP or Pascal
4832
            'ino'         => 'Arduino Sketch'        ,
4833
            'pde'         => 'Arduino Sketch'        , # pre 1.0
4834
            'itk'         => 'Tcl/Tk'                ,
4835
            'java'        => 'Java'                  ,
4836
            'jcl'         => 'JCL'                   , # IBM Job Control Lang.
4837
            'jl'          => 'Lisp/Julia'            ,
4838
            'js'          => 'Javascript'            ,
4839
            'jsf'         => 'JavaServer Faces'      ,
4840
            'xhtml'       => 'JavaServer Faces'      ,
4841
            'json'        => 'JSON'                  ,
4842
            'jsp'         => 'JSP'                   , # Java server pages
4843
            'jspf'        => 'JSP'                   , # Java server pages
4844
            'vm'          => 'Velocity Template Language' ,
4845
            'ksc'         => 'Kermit'                ,
4846
            'ksh'         => 'Korn Shell'            ,
4847
            'kt'          => 'Kotlin'                ,
4848
            'lhs'         => 'Haskell'               ,
4849
            'l'           => 'lex'                   ,
4850
            'less'        => 'LESS'                  ,
4851
            'lsp'         => 'Lisp'                  ,
4852
            'lisp'        => 'Lisp'                  ,
4853
            'lua'         => 'Lua'                   ,
4854
            'm3'          => 'Modula3'               ,
4855
            'm4'          => 'm4'                    ,
4856
            'makefile'    => 'make'                  ,
4857
            'Makefile'    => 'make'                  ,
4858
            'mc'          => 'Windows Message File'  ,
4859
            'met'         => 'Teamcenter met'        ,
4860
            'mg'          => 'Modula3'               , 
4861
#           'mli'         => 'ML'                    , # ML not implemented
4862
#           'ml'          => 'ML'                    , 
4863
            'ml'          => 'OCaml'                 , 
4864
            'mli'         => 'OCaml'                 , 
4865
            'mly'         => 'OCaml'                 , 
4866
            'mll'         => 'OCaml'                 , 
4867
            'm'           => 'MATLAB/Objective C/MUMPS/Mercury' ,
4868
            'mm'          => 'Objective C++'         ,
4869
            'mustache'    => 'Mustache'              ,
4870
            'wdproj'      => 'MSBuild script'        ,
4871
            'csproj'      => 'MSBuild script'        ,
4872
            'vcproj'      => 'MSBuild script'        ,
4873
            'wixproj'     => 'MSBuild script'        ,
4874
            'vbproj'      => 'MSBuild script'        ,
4875
            'mps'         => 'MUMPS'                 ,
4876
            'mth'         => 'Teamcenter mth'        ,
4877
            'oscript'     => 'LiveLink OScript'      ,
4878
            'pad'         => 'Ada'                   , # Oracle Ada preprocessor
4879
            'page'        => 'Visualforce Page'      ,
4880
            'pas'         => 'Pascal'                ,
4881
            'pcc'         => 'C++'                   , # Oracle C++ preprocessor
4882
            'perl'        => 'Perl'                  ,
4883
            'pfo'         => 'Fortran 77'            ,
4884
            'pgc'         => 'C'                     , # Postgres embedded C/C++
4885
            'php3'        => 'PHP'                   ,
4886
            'php4'        => 'PHP'                   ,
4887
            'php5'        => 'PHP'                   ,
4888
            'php'         => 'PHP'                   ,
4889
            'pig'         => 'Pig Latin'             ,
4890
            'plh'         => 'Perl'                  ,
4891
            'pl'          => 'Perl/Prolog'           ,
4892
            'PL'          => 'Perl/Prolog'           ,
4893
            'plx'         => 'Perl'                  ,
4894
            'pm'          => 'Perl'                  ,
4895
            'pom.xml'     => 'Maven'                 ,
4896
            'pom'         => 'Maven'                 ,
4897
            'P'           => 'Prolog'                ,
4898
            'p'           => 'Pascal'                ,
4899
            'pp'          => 'Pascal/Puppet'         ,
4900
            'psql'        => 'SQL'                   ,
4901
            'py'          => 'Python'                ,
4902
            'pyx'         => 'Cython'                ,
4903
            'qml'         => 'QML'                   ,
4904
            'rb'          => 'Ruby'                  ,
4905
            'rake'        => 'Ruby'                  ,
4906
         #  'resx'        => 'ASP.Net'               ,
4907
            'rex'         => 'Oracle Reports'        ,
4908
            'rexx'        => 'Rexx'                  ,
4909
            'rhtml'       => 'Ruby HTML'             ,
4910
            'rs'          => 'Rust'                  ,
4911
            's'           => 'Assembly'              ,
4912
            'S'           => 'Assembly'              ,
4913
            'SCA'         => 'Visual Fox Pro'        ,
4914
            'sca'         => 'Visual Fox Pro'        ,
4915
            'scala'       => 'Scala'                 ,
4916
            'sbl'         => 'Softbridge Basic'      ,
4917
            'SBL'         => 'Softbridge Basic'      ,
4918
            'sc'          => 'Lisp'                  ,
4919
            'scm'         => 'Lisp'                  ,
4920
            'sed'         => 'sed'                   ,
4921
            'ses'         => 'Patran Command Language'   ,
4922
            'pcl'         => 'Patran Command Language'   ,
4923
            'pl1'         => 'PL/I'                  ,
4924
            'purs'        => 'PureScript'            ,
4925
            'prefab'      => 'Unity-Prefab'          ,
4926
            'proto'       => 'Protocol Buffers'      ,
4927
            'mat'         => 'Unity-Prefab'          ,
4928
            'ps1'         => 'PowerShell'            ,
4929
            'R'           => 'R'                     ,
4930
            'rkt'         => 'Racket'                ,
4931
            'rktl'        => 'Racket'                ,
4932
            'ss'          => 'Racket'                ,
4933
            'scm'         => 'Racket'                ,
4934
            'sch'         => 'Racket'                ,
4935
            'scrbl'       => 'Racket'                ,
4936
            'tsv'         => 'RobotFramework'        ,
4937
            'robot'       => 'RobotFramework'        ,
4938
            'rc'          => 'Windows Resource File' ,
4939
            'rc2'         => 'Windows Resource File' ,
4940
            'sas'         => 'SAS'                   ,
4941
            'sass'        => 'SASS'                  ,
4942
            'scss'        => 'SASS'                  ,
4943
            'sh'          => 'Bourne Shell'          ,
4944
            'smarty'      => 'Smarty'                ,
4945
            'sml'         => 'Standard ML'           ,
4946
            'sig'         => 'Standard ML'           ,
4947
            'fun'         => 'Standard ML'           ,
4948
            'sql'         => 'SQL'                   ,
4949
            'SQL'         => 'SQL'                   ,
4950
            'sproc.sql'   => 'SQL Stored Procedure'  ,
4951
            'spoc.sql'    => 'SQL Stored Procedure'  ,
4952
            'spc.sql'     => 'SQL Stored Procedure'  ,
4953
            'udf.sql'     => 'SQL Stored Procedure'  ,
4954
            'data.sql'    => 'SQL Data'              ,
4955
            'v'           => 'Verilog-SystemVerilog' ,
4956
            'sv'          => 'Verilog-SystemVerilog' ,
4957
            'svh'         => 'Verilog-SystemVerilog' ,
4958
            'tcl'         => 'Tcl/Tk'                ,
4959
            'tcsh'        => 'C Shell'               ,
4960
            'tk'          => 'Tcl/Tk'                ,
4961
            'tpl'         => 'Smarty'                ,
4962
            'trigger'     => 'Apex Trigger'          ,
4963
            'ts'          => 'TypeScript'            ,
4964
            'tss'         => 'Titanium Style Sheet'  ,
4965
            'vala'        => 'Vala'                  ,
4966
            'vapi'        => 'Vala Header'           ,
4967
            'vhd'         => 'VHDL'                  ,
4968
            'VHD'         => 'VHDL'                  ,
4969
            'vhdl'        => 'VHDL'                  ,
4970
            'VHDL'        => 'VHDL'                  ,
4971
            'vba'         => 'Visual Basic'          ,
4972
            'VBA'         => 'Visual Basic'          ,
4973
         #  'vbp'         => 'Visual Basic'          , # .vbp - autogenerated
4974
            'vb'          => 'Visual Basic'          ,
4975
            'VB'          => 'Visual Basic'          ,
4976
         #  'vbw'         => 'Visual Basic'          , # .vbw - autogenerated
4977
            'vbs'         => 'Visual Basic'          ,
4978
            'VBS'         => 'Visual Basic'          ,
4979
            'webinfo'     => 'ASP.Net'               ,
4980
            'xml'         => 'XML'                   ,
4981
            'XML'         => 'XML'                   ,
4982
            'mxml'        => 'MXML'                  ,
4983
            'build'       => 'NAnt script'           ,
4984
            'vim'         => 'vim script'            ,
4985
            'swift'       => 'Swift'                 ,
4986
            'xaml'        => 'XAML'                  ,
4987
            'wxs'         => 'WiX source'            ,
4988
            'wxi'         => 'WiX include'           ,
4989
            'wxl'         => 'WiX string localization' ,
4990
            'prg'         => 'xBase'                 ,
4991
            'ch'          => 'xBase Header'          ,
4992
            'xq'          => 'XQuery'                ,
4993
            'xquery'      => 'XQuery'                ,
4994
            'xsd'         => 'XSD'                   ,
4995
            'XSD'         => 'XSD'                   ,
4996
            'xslt'        => 'XSLT'                  ,
4997
            'XSLT'        => 'XSLT'                  ,
4998
            'xsl'         => 'XSLT'                  ,
4999
            'XSL'         => 'XSLT'                  ,
5000
            'y'           => 'yacc'                  ,
5001
            'yaml'        => 'YAML'                  ,
5002
            'yml'         => 'YAML'                  ,
5003
            );
5004
# 1}}}
5005
%{$rh_Language_by_Script}    = (             # {{{1
5006
            'awk'      => 'awk'                   ,
5007
            'bash'     => 'Bourne Again Shell'    ,
5008
            'bc'       => 'bc'                    ,# calculator
5009
            'csh'      => 'C Shell'               ,
5010
            'dmd'      => 'D'                     ,
5011
            'dtrace'   => 'dtrace'                ,
5012
            'idl'      => 'IDL'                   ,
5013
            'kermit'   => 'Kermit'                ,
5014
            'ksh'      => 'Korn Shell'            ,
5015
            'lua'      => 'Lua'                   ,
5016
            'make'     => 'make'                  ,
5017
            'octave'   => 'Octave'                ,
5018
            'perl5'    => 'Perl'                  ,
5019
            'perl'     => 'Perl'                  ,
5020
            'php'      => 'PHP'                   ,
5021
            'php5'     => 'PHP'                   ,
5022
            'python'   => 'Python'                ,
5023
            'python2.6'=> 'Python'                ,
5024
            'python2.7'=> 'Python'                ,
5025
            'python3'  => 'Python'                ,
5026
            'python3.3'=> 'Python'                ,
5027
            'python3.4'=> 'Python'                ,
5028
            'rexx'     => 'Rexx'                  ,
5029
            'regina'   => 'Rexx'                  ,
5030
            'ruby'     => 'Ruby'                  ,
5031
            'sed'      => 'sed'                   ,
5032
            'sh'       => 'Bourne Shell'          ,
5033
            'swipl'    => 'Prolog'                ,
5034
            'tcl'      => 'Tcl/Tk'                ,
5035
            'tclsh'    => 'Tcl/Tk'                ,
5036
            'tcsh'     => 'C Shell'               ,
5037
            'wish'     => 'Tcl/Tk'                ,
5038
            );
5039
# 1}}}
5040
%{$rh_Language_by_File}      = (             # {{{1
5041
            'Makefile'       => 'make'               ,
5042
            'makefile'       => 'make'               ,
5043
            'gnumakefile'    => 'make'               ,
5044
            'Gnumakefile'    => 'make'               ,
5045
            'CMakeLists.txt' => 'CMake'              ,
5046
            'build.xml'      => 'Ant/XML'            ,
5047
            'pom.xml'        => 'Maven/XML'          ,
5048
            'Rakefile'       => 'Ruby'               ,
5049
            'rakefile'       => 'Ruby'               ,
5050
            );
5051
# 1}}}
5052
%{$rhaa_Filters_by_Language} = (             # {{{1
5053
    '(unknown)'          => [ ],
5054
    'ABAP'               => [   [ 'remove_matches'      , '^\*'    ], ],
5055
    'ActionScript'       => [   
5056
                                [ 'remove_matches'      , '^\s*//' ], 
5057
                                [ 'call_regexp_common'  , 'C'      ],
5058
                            ],
5059
 
5060
    'ASP'                => [   [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
5061
    'ASP.Net'            => [   [ 'call_regexp_common'  , 'C'      ], ],
5062
    'Ada'                => [   [ 'remove_matches'      , '^\s*--' ], ],
5063
    'ADSO/IDSM'          => [   [ 'remove_matches'      , '^\s*\*[\+\!]' ], ],
5064
    'AMPLE'              => [   [ 'remove_matches'      , '^\s*//' ], ],
5065
    'Ant/XML'            => [
5066
                                [ 'remove_html_comments',          ],
5067
                                [ 'call_regexp_common'  , 'HTML'   ], 
5068
                            ],
5069
    'Ant'                => [
5070
                                [ 'remove_html_comments',          ],
5071
                                [ 'call_regexp_common'  , 'HTML'   ], 
5072
                            ],
5073
    'Apex Trigger'       => [
5074
                                [ 'remove_matches'      , '^\s*//' ],
5075
                                [ 'call_regexp_common'  , 'C'      ], 
5076
                                [ 'remove_inline'       , '//.*$'  ],
5077
                            ],
5078
    'Arduino Sketch'     => [   # same as C
5079
                                [ 'remove_matches'      , '^\s*//' ],
5080
                                [ 'call_regexp_common'  , 'C'      ], 
5081
                                [ 'remove_inline'       , '//.*$'  ],
5082
                            ], 
5083
    'Assembly'           => [  
5084
                                [ 'remove_matches'      , '^\s*//' ],
5085
                                [ 'remove_matches'      , '^\s*;'  ],
5086
                                [ 'remove_matches'      , '^\s*\@' ], 
5087
                                [ 'remove_matches'      , '^\s*\|' ], 
5088
                                [ 'remove_matches'      , '^\s*!'  ], 
5089
                                [ 'remove_matches'      , '^\s*#'  ], 
5090
                                [ 'remove_matches'      , '^\s*--' ], 
5091
                                [ 'call_regexp_common'  , 'C'      ], 
5092
                                [ 'remove_inline'       , '//.*$'  ], 
5093
                                [ 'remove_inline'       , ';.*$'   ], 
5094
                                [ 'remove_inline'       , '\@.*$'  ], 
5095
                                [ 'remove_inline'       , '\|.*$'  ], 
5096
                                [ 'remove_inline'       , '!.*$'   ], 
5097
                                [ 'remove_inline'       , '#.*$'   ], 
5098
                                [ 'remove_inline'       , '--.*$'  ], 
5099
                            ],
5100
    'AutoHotkey'         => [   
5101
                                [ 'remove_matches'      , '^\s*;'  ],
5102
                                [ 'remove_inline'       , ';.*$'   ],
5103
                            ],
5104
    'awk'                => [   
5105
                                [ 'remove_matches'      , '^\s*#'  ], 
5106
                                [ 'remove_inline'       , '#.*$'   ],
5107
                            ], 
5108
    'bc'                 => [   
5109
                                [ 'remove_matches'      , '^\s*#'  ], 
5110
                                [ 'remove_inline'       , '#.*$'   ],
5111
                            ], 
5112
    'Bourne Again Shell' => [   
5113
                                [ 'remove_matches'      , '^\s*#'  ], 
5114
                                [ 'remove_inline'       , '#.*$'   ],
5115
                            ], 
5116
    'Bourne Shell'       => [   
5117
                                [ 'remove_matches'      , '^\s*#'  ], 
5118
                                [ 'remove_inline'       , '#.*$'   ],
5119
                            ], 
5120
    'C'                  => [   
5121
                                [ 'remove_matches'      , '^\s*//' ], # C99
5122
                                [ 'call_regexp_common'  , 'C'      ], 
5123
                                [ 'remove_inline'       , '//.*$'  ], # C99
5124
                            ], 
5125
    'C++'                => [   
5126
                                [ 'remove_matches'      , '^\s*//' ], 
5127
                                [ 'remove_inline'       , '//.*$'  ], 
5128
                                [ 'call_regexp_common'  , 'C'      ],
5129
                            ],
5130
    'C/C++ Header'       => [   
5131
                                [ 'remove_matches'      , '^\s*//' ], 
5132
                                [ 'call_regexp_common'  , 'C'      ], 
5133
                                [ 'remove_inline'       , '//.*$'  ], 
5134
                            ],
5135
    'Clojure'            => [   [ 'remove_matches'      , '^\s*;'  ], ],
5136
    'ClojureScript'      => [   [ 'remove_matches'      , '^\s*;'  ], ],
5137
    'CMake'              => [   
5138
                                [ 'remove_matches'      , '^\s*#'  ],
5139
                                [ 'remove_inline'       , '#.*$'   ], 
5140
                            ],
5141
    'CUDA'               => [   
5142
                                [ 'remove_matches'      , '^\s*//' ], 
5143
                                [ 'remove_inline'       , '//.*$'  ], 
5144
                                [ 'call_regexp_common'  , 'C'      ],
5145
                            ],
5146
    'Cython'             => [   
5147
                                [ 'remove_matches'      , '^\s*#'  ], 
5148
                                [ 'docstring_to_C'                 ], 
5149
                                [ 'call_regexp_common'  , 'C'      ],
5150
                                [ 'remove_inline'       , '#.*$'   ],
5151
                            ], 
5152
    'C#'                 => [   
5153
                                [ 'remove_matches'      , '^\s*//' ], 
5154
                                [ 'call_regexp_common'  , 'C'      ],
5155
                                [ 'remove_inline'       , '//.*$'  ], 
5156
                            ],
5157
    'CCS'                => [   [ 'call_regexp_common'  , 'C'      ], ],
5158
    'CSS'                => [   [ 'call_regexp_common'  , 'C'      ], ],
5159
    'COBOL'              => [   [ 'remove_cobol_comments',         ], ],
5160
    'CoffeeScript'       => [   
5161
                                [ 'remove_matches'      , '^\s*#'  ],
5162
                                [ 'remove_inline'       , '#.*$'   ], 
5163
                            ],
5164
    'ColdFusion'         => [   [ 'remove_html_comments',          ],
5165
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5166
    'ColdFusion CFScript'=> [
5167
                                [ 'remove_matches'      , '^\s*//' ], 
5168
                                [ 'call_regexp_common'  , 'C'      ],
5169
                                [ 'remove_inline'       , '//.*$'  ], 
5170
                            ],
5171
    'Crystal Reports'    => [   [ 'remove_matches'      , '^\s*//' ], ],
5172
    'D/dtrace'           => [ [ 'die' ,          ], ], # never called
5173
    'D'                  => [   
5174
                                [ 'remove_matches'      , '^\s*//' ], 
5175
                                [ 'remove_between_general', '/+', '+/' ],
5176
                                [ 'call_regexp_common'  , 'C'      ],
5177
                                [ 'remove_inline'       , '//.*$'  ], 
5178
                            ],
5179
    'DAL'                => [
5180
                                [ 'remove_between_general', '[', ']', ],
5181
                            ],
5182
    'Dart'               => [   
5183
                                [ 'remove_matches'      , '^\s*//' ], 
5184
                                [ 'remove_inline'       , '//.*$'  ], 
5185
                                [ 'call_regexp_common'  , 'C'      ],
5186
                            ],
5187
    # diff is kind of weird: anything but a space in the first column
5188
    # will count as code, with the exception of #, ---, +++.  Spaces
5189
    # in the first column denote context lines which aren't part of the
5190
    # difference.
5191
    'diff'               => [   
5192
                                [ 'remove_matches'      , '^#' ], 
5193
                                [ 'remove_matches'      , '^\-\-\-' ], 
5194
                                [ 'remove_matches'      , '^\+\+\+' ], 
5195
                                [ 'remove_matches'      , '^\s' ], 
5196
                            ],
5197
    'DITA'               => [   
5198
                                [ 'remove_html_comments',          ],
5199
                                [ 'call_regexp_common'  , 'HTML'   ],
5200
                            ],
5201
    'DOORS Extension Language' => [
5202
                                [ 'remove_matches'      , '^\s*//' ], 
5203
                                [ 'remove_inline'       , '//.*$'  ], 
5204
                                [ 'call_regexp_common'  , 'C'      ],
5205
                            ],
5206
    'dtrace'             => [   
5207
                                [ 'remove_matches'      , '^\s*#'  ], 
5208
                                [ 'remove_inline'       , '#.*$'   ],
5209
                            ], 
5210
    'ECPP'               => [   
5211
                                [ 'remove_between_general', 
5212
                                  '<%doc>', '</%doc>',             ],
5213
                                [ 'remove_between_general', 
5214
                                  '<#'    , '#>'     ,             ],
5215
                                [ 'call_regexp_common'  , 'HTML'   ], 
5216
                            ],
5217
    'ERB'                => [   
5218
                                [ 'remove_between_general', '<%#', '%>' ],
5219
                            ],
5220
    'NASTRAN DMAP'       => [   
5221
                                [ 'remove_matches'      , '^\s*\$' ], 
5222
                                [ 'remove_inline'       , '\$.*$'  ], 
5223
                            ],
5224
    'DOS Batch'          => [   [ 'remove_matches'      , '^\s*rem', ], ],
5225
    'DTD'                => [   [ 'remove_html_comments',          ],
5226
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5227
    'Elixir'             => [
5228
                                [ 'remove_matches'      , '^\s*#'  ],
5229
                                [ 'remove_inline'       , '#.*$'   ],
5230
                            ],
5231
    'Erlang'             => [   
5232
                                [ 'remove_matches'      , '^\s*%'  ], 
5233
                                [ 'remove_inline'       , '%.*$'   ],
5234
                            ],
5235
    'Expect'             => [   
5236
                                [ 'remove_matches'      , '^\s*#'  ], 
5237
                                [ 'remove_inline'       , '#.*$'   ],
5238
                            ], 
5239
    'Focus'              => [   [ 'remove_matches'      , '^\s*\-\*'  ], ],
5240
    'Fortran 77'         => [   
5241
                                [ 'remove_f77_comments' ,          ], 
5242
                                [ 'remove_inline'       , '\!.*$'  ],
5243
                            ],
5244
    'Fortran 90'         => [   
5245
                                [ 'remove_f77_comments' ,          ],
5246
                                [ 'remove_f90_comments' ,          ], 
5247
                                [ 'remove_inline'       , '\!.*$'  ],
5248
                            ],
5249
    'Fortran 95'         => [   
5250
                                [ 'remove_f77_comments' ,          ],
5251
                                [ 'remove_f90_comments' ,          ], 
5252
                                [ 'remove_inline'       , '\!.*$'  ],
5253
                            ],
5254
    'F#'                 => [   
5255
                                [ 'call_regexp_common'  , 'Pascal' ], 
5256
                                [ 'remove_matches'      , '^\s*//' ],
5257
                            ],
5258
    'Go'                 => [   
5259
                                [ 'remove_matches'      , '^\s*//' ], 
5260
                                [ 'remove_inline'       , '//.*$'  ], 
5261
                                [ 'call_regexp_common'  , 'C'      ],
5262
                            ],
5263
    'Grails'             => [   
5264
                                [ 'remove_html_comments',          ],
5265
                                [ 'call_regexp_common'  , 'HTML'   ],
5266
                                [ 'remove_jsp_comments' ,          ], 
5267
                                [ 'remove_matches'      , '^\s*//' ],
5268
                                [ 'add_newlines'        ,          ],
5269
                                [ 'call_regexp_common'  , 'C'      ],
5270
                            ],
5271
    'Groovy'             => [   
5272
                                [ 'remove_matches'      , '^\s*//' ], 
5273
                                [ 'remove_inline'       , '//.*$'  ], 
5274
                                [ 'call_regexp_common'  , 'C'      ],
5275
                            ],
5276
    'Handlebars'         => [
5277
                                [ 'remove_between_general', '{{!--', '--}}' ],
5278
                                [ 'remove_between_general', '{{!', '}}' ],
5279
                                [ 'remove_html_comments',          ],
5280
                            ],
5281
    'Harbour'            => [
5282
                                [ 'remove_matches'      , '^\s*//' ], 
5283
                                [ 'remove_matches'      , '^\s*\&\&' ], 
5284
                                [ 'remove_matches'      , '^\s*\*' ], 
5285
                                [ 'remove_matches'      , '^\s*NOTE' ], 
5286
                                [ 'remove_matches'      , '^\s*note' ], 
5287
                                [ 'remove_matches'      , '^\s*Note' ], 
5288
                                [ 'remove_inline'       , '//.*$'  ], 
5289
                                [ 'remove_inline'       , '\&\&.*$' ], 
5290
                                [ 'call_regexp_common'  , 'C'      ],
5291
                            ],
5292
    'HLSL'               => [   
5293
                                [ 'remove_matches'      , '^\s*//' ], 
5294
                                [ 'remove_inline'       , '//.*$'  ], 
5295
                                [ 'call_regexp_common'  , 'C'      ],
5296
                            ],
5297
    'Haml'               => [   
5298
                                [ 'remove_haml_block'   ,          ], 
5299
                                [ 'remove_html_comments',          ],
5300
                                [ 'remove_matches'      , '^\s*/\s*\S+' ], 
5301
                                [ 'remove_matches'      , '^\s*-#\s*\S+' ], 
5302
                            ],
5303
    'HTML'               => [   
5304
                                [ 'remove_html_comments',          ],
5305
                                [ 'call_regexp_common'  , 'HTML'   ], 
5306
                            ],
5307
    'Haskell'            => [   [ 'remove_haskell_comments', '>filename<' ], ],
5308
    'IDL'                => [   [ 'remove_matches'      , '^\s*;'  ], ],
5309
    'IDL/Qt Project/Prolog' => [ [ 'die' ,          ], ], # never called
5310
    'InstallShield'      => [   [ 'remove_html_comments',          ],
5311
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5312
    'JSP'                => [   [ 'remove_html_comments',          ],
5313
                                [ 'call_regexp_common'  , 'HTML'   ],
5314
                                [ 'remove_jsp_comments' ,          ], 
5315
                                [ 'remove_matches'      , '^\s*//' ],
5316
                                [ 'add_newlines'        ,          ],
5317
                                [ 'call_regexp_common'  , 'C'      ],
5318
                            ],
5319
    'JavaServer Faces'   => [   
5320
                                [ 'remove_matches'      , '^\s*//' ], 
5321
                                [ 'call_regexp_common'  , 'C'      ],
5322
                                [ 'remove_inline'       , '//.*$'  ], 
5323
                            ],
5324
    'Java'               => [   
5325
                                [ 'remove_matches'      , '^\s*//' ], 
5326
                                [ 'call_regexp_common'  , 'C'      ],
5327
                                [ 'remove_inline'       , '//.*$'  ], 
5328
                            ],
5329
    'Javascript'         => [   
5330
                                [ 'remove_matches'      , '^\s*//' ], 
5331
                                [ 'call_regexp_common'  , 'C'      ],
5332
                                [ 'remove_inline'       , '//.*$'  ], 
5333
                            ],
5334
    'JCL'                => [   [ 'remove_jcl_comments' ,          ], ],
5335
    'JSON'               => [   # ECMA-404, the JSON standard definition
5336
                                # makes no provision for JSON comments
5337
                                # so just use a placeholder filter
5338
                                [ 'remove_matches'      , '^\s*$'  ], 
5339
                            ],
5340
    'Julia'              => [
5341
                                [ 'remove_matches'      , '^\s*#'  ], 
5342
                                [ 'remove_inline'       , '#.*$'   ],
5343
                                [ 'remove_between_general', '#=', '=#' ],
5344
                            ],
5345
    'Kotlin'             => [
5346
                                [ 'remove_matches'      , '^\s*//' ], 
5347
                                [ 'remove_inline'       , '//.*$'  ], 
5348
                                [ 'remove_between_general', '/*', '*/' ],
5349
                            ],
5350
    'LESS'               => [
5351
                                [ 'remove_matches'      , '^\s*//' ], 
5352
                                [ 'call_regexp_common'  , 'C'      ],
5353
                                [ 'remove_inline'       , '//.*$'  ], 
5354
                            ],
5355
    'Lisp'               => [   
5356
                                [ 'remove_matches'      , '^\s*;'  ], 
5357
                                [ 'remove_between_general', '#|', '|#' ],
5358
                            ],
5359
    'Lisp/OpenCL'        => [ [ 'die' ,          ], ], # never called
5360
    'Lisp/Julia'         => [ [ 'die' ,          ], ], # never called
5361
    'LiveLink OScript'   => [   [ 'remove_matches'      , '^\s*//' ], ],
5362
#   'Lua'                => [   [ 'call_regexp_common'  , 'lua'    ], ],
5363
    'Lua'                => [   [ 'remove_matches'      , '^\s*\-\-' ], ],
5364
    'make'               => [   
5365
                                [ 'remove_matches'      , '^\s*#'  ], 
5366
                                [ 'remove_inline'       , '#.*$'   ],
5367
                            ], 
5368
    'MATLAB'             => [   
5369
                                [ 'remove_matches'      , '^\s*%'  ], 
5370
                                [ 'remove_inline'       , '%.*$'   ],
5371
                            ], 
5372
    'Maven/XML'          => [
5373
                                [ 'remove_html_comments',          ],
5374
                                [ 'call_regexp_common'  , 'HTML'   ], 
5375
                            ],
5376
    'Maven'              => [
5377
                                [ 'remove_html_comments',          ],
5378
                                [ 'call_regexp_common'  , 'HTML'   ], 
5379
                            ],
5380
    'Mercury'            => [   
5381
                                [ 'remove_inline'       , '%.*$'   ],
5382
                                [ 'remove_matches'      , '^\s*%'  ], 
5383
                            ], 
5384
    'Modula3'            => [   [ 'call_regexp_common'  , 'Pascal' ], ],
5385
        # Modula 3 comments are (* ... *) so applying the Pascal filter
5386
        # which also treats { ... } as a comment is not really correct.
5387
    'Objective C'        => [   
5388
                                [ 'remove_matches'      , '^\s*//' ],
5389
                                [ 'call_regexp_common'  , 'C'      ], 
5390
                                [ 'remove_inline'       , '//.*$'  ], 
5391
                            ], 
5392
    'Objective C++'      => [   
5393
                                [ 'remove_matches'      , '^\s*//' ],
5394
                                [ 'call_regexp_common'  , 'C'      ], 
5395
                                [ 'remove_inline'       , '//.*$'  ], 
5396
                            ], 
5397
    'OCaml'              => [   
5398
                                [ 'call_regexp_common'  , 'Pascal' ], 
5399
                            ],
5400
    'OpenCL'             => [   
5401
                                [ 'remove_matches'      , '^\s*//' ], # C99
5402
                                [ 'call_regexp_common'  , 'C'      ], 
5403
                                [ 'remove_inline'       , '//.*$'  ], # C99
5404
                            ],
5405
    'PHP/Pascal'               => [ [ 'die' ,          ], ], # never called
5406
    'MATLAB/Objective C/MUMPS/Mercury' => [ [ 'die' ,          ], ], # never called
5407
    'MUMPS'              => [   [ 'remove_matches'      , '^\s*;'  ], ], 
5408
    'Mustache'           => [
5409
                                [ 'remove_between_general', '{{!', '}}' ],
5410
                            ],
5411
    'Octave'             => [   
5412
                                [ 'remove_matches'      , '^\s*#'  ], 
5413
                                [ 'remove_inline'       , '#.*$'   ],
5414
                            ], 
5415
    'Oracle Forms'       => [   [ 'call_regexp_common'  , 'C'      ], ],
5416
    'Oracle Reports'     => [   [ 'call_regexp_common'  , 'C'      ], ],
5417
    'Pascal'             => [
5418
                                [ 'remove_between_regex', '{[^$]', '}' ],
5419
                                [ 'remove_between_general', '(*', '*)' ],
5420
                                [ 'remove_matches'      , '^\s*//' ],
5421
                            ],
5422
####'Pascal'             => [   
5423
####                            [ 'call_regexp_common'  , 'Pascal' ], 
5424
####                            [ 'remove_matches'      , '^\s*//' ],
5425
####                        ],
5426
    'Pascal/Puppet'            => [ [ 'die' ,          ], ], # never called
5427
    'Puppet'             => [   
5428
                                [ 'remove_matches'      , '^\s*#'   ], 
5429
                                [ 'call_regexp_common'  , 'C'       ],
5430
                                [ 'remove_inline'       , '#.*$'   ],
5431
                            ],
5432
    'PureScript'         => [   
5433
                                [ 'remove_matches'      , '^\s*--' ],
5434
                                [ 'remove_between_general', '{-', '-}' ],
5435
                                [ 'remove_inline'       , '--.*$'  ],
5436
                            ],
5437
    'Patran Command Language'=> [   
5438
                                [ 'remove_matches'      , '^\s*#'   ], 
5439
                                [ 'remove_matches'      , '^\s*\$#' ], 
5440
                                [ 'call_regexp_common'  , 'C'       ],
5441
                            ],
5442
    'PL/I'               => [
5443
                                [ 'call_regexp_common'  , 'C'      ], 
5444
                            ],
5445
    'Perl'               => [   [ 'remove_below'        , '^__(END|DATA)__'],
5446
                                [ 'remove_matches'      , '^\s*#'  ], 
5447
                                [ 'remove_below_above'  , '^=head1', '^=cut'  ], 
5448
                                [ 'remove_inline'       , '#.*$'   ],
5449
                            ], 
5450
    'Perl/Prolog'        => [ [ 'die' ,          ], ], # never called
5451
    'Pig Latin'          => [   
5452
                                [ 'remove_matches'      , '^\s*--' ],
5453
                                [ 'remove_inline'       , '--.*$'  ],
5454
                                [ 'call_regexp_common'  , 'C'       ],
5455
                            ],
5456
    'PowerShell'         => [ 
5457
                                [ 'powershell_to_C'                ], 
5458
                                [ 'call_regexp_common'  , 'C'      ],
5459
                                [ 'remove_matches'      , '^\s*#'  ], 
5460
                                [ 'remove_inline'       , '#.*$'   ],
5461
                            ], 
5462
    'Prolog'             => [   
5463
                                [ 'remove_matches'      , '^\s*\%' ],
5464
                                [ 'call_regexp_common'  , 'C'      ],
5465
                                [ 'remove_inline'       , '(//|\%).*$' ], 
5466
                            ],
5467
    'Protocol Buffers'   => [   
5468
                                [ 'remove_matches'      , '^\s*//' ], 
5469
                                [ 'remove_inline'       , '//.*$'  ], 
5470
                                [ 'call_regexp_common'  , 'C'      ],
5471
                            ],
5472
    'Python'             => [   
5473
                                [ 'remove_matches'      , '^\s*#'  ], 
5474
                                [ 'docstring_to_C'                 ], 
5475
                                [ 'call_regexp_common'  , 'C'      ],
5476
                                [ 'remove_inline'       , '#.*$'   ],
5477
                            ], 
5478
    'PHP'                => [   
5479
                                [ 'remove_matches'      , '^\s*#'  ],
5480
                                [ 'remove_matches'      , '^\s*//' ], 
5481
                                [ 'call_regexp_common'  , 'C'      ], 
5482
                                [ 'remove_inline'       , '#.*$'   ],
5483
                                [ 'remove_inline'       , '//.*$'  ],
5484
                            ],
5485
    'QML'                => [   
5486
                                [ 'remove_matches'      , '^\s*//' ], 
5487
                                [ 'call_regexp_common'  , 'C'      ],
5488
                                [ 'remove_inline'       , '//.*$'  ], 
5489
                            ],
5490
    'Qt Project'         => [   
5491
                                [ 'remove_matches'      , '^\s*#'  ], 
5492
                                [ 'remove_inline'       , '#.*$'   ],
5493
                            ],
5494
    'R'                  => [   
5495
                                [ 'remove_matches'      , '^\s*#'  ], 
5496
                                [ 'remove_inline'       , '#.*$'   ],
5497
                            ], 
5498
    'Racket'             => [   
5499
                                [ 'remove_matches'      , '^\s*;'  ], 
5500
                                [ 'remove_inline'       , ';.*$'   ],
5501
                            ], 
5502
    'Razor'              => [
5503
                                [ 'remove_matches'      , '^\s*//' ],
5504
                                [ 'remove_between_general', '@*', '*@' ],
5505
                                [ 'call_regexp_common'  , 'C'      ], 
5506
                                [ 'remove_inline'       , '//.*$'  ],
5507
                            ], 
5508
    'RobotFramework'     => [   
5509
                                [ 'remove_matches'      , '^\s*#'   ], 
5510
                                [ 'remove_matches'      , '^\s*Comment' ], 
5511
                                [ 'remove_matches'      , '^\s*\*{3}\s+(Variables|Test\s+Cases|Settings|Keywords)\s+\*{3}' ] ,
5512
                                [ 'remove_matches'      , '^\s*\[(Documentation|Tags)\]' ],
5513
                                [ 'remove_inline'       , '#.*$'   ],
5514
                            ],
5515
    'Rexx'               => [   [ 'call_regexp_common'  , 'C'      ], ],
5516
    'Ruby'               => [   
5517
                                [ 'remove_matches'      , '^\s*#'  ], 
5518
                                [ 'remove_below_above'  , '^=begin', '^=end' ], 
5519
                                [ 'remove_inline'       , '#.*$'   ],
5520
                            ], 
5521
    'Ruby HTML'          => [   [ 'remove_html_comments',          ],
5522
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5523
    'Rust'               => [   
5524
                                [ 'remove_matches'      , '^\s*//' ], 
5525
                                [ 'remove_inline'       , '//.*$'  ], 
5526
                                [ 'call_regexp_common'  , 'C'      ],
5527
                            ],
5528
    'SAS'                => [   
5529
                                [ 'call_regexp_common'  , 'C'      ],
5530
                                [ 'remove_between_general', '*', ';' ],
5531
                            ],
5532
    'SASS'               => [   
5533
                                [ 'remove_matches'      , '^\s*//' ], 
5534
                                [ 'remove_inline'       , '//.*$'  ], 
5535
                            ],
5536
    'Scala'              => [   
5537
                                [ 'remove_matches'      , '^\s*//' ], 
5538
                                [ 'remove_inline'       , '//.*$'  ], 
5539
                                [ 'call_regexp_common'  , 'C'      ],
5540
                            ],
5541
    'SKILL'              => [   
5542
                                [ 'call_regexp_common'  , 'C'      ], 
5543
                                [ 'remove_matches'      , '^\s*;'  ],
5544
                            ],
5545
    'SKILL++'            => [   
5546
                                [ 'call_regexp_common'  , 'C'      ], 
5547
                                [ 'remove_matches'      , '^\s*;'  ],
5548
                            ],
5549
    'SQL'                => [   
5550
                                [ 'call_regexp_common'  , 'C'      ], 
5551
                                [ 'remove_matches'      , '^\s*--' ],
5552
                                [ 'remove_inline'       , '--.*$'  ],
5553
                            ],
5554
    'SQL Stored Procedure'=> [   
5555
                                [ 'call_regexp_common'  , 'C'      ], 
5556
                                [ 'remove_matches'      , '^\s*--' ],
5557
                                [ 'remove_inline'       , '--.*$'  ],
5558
                            ],
5559
    'SQL Data'           => [   
5560
                                [ 'call_regexp_common'  , 'C'      ], 
5561
                                [ 'remove_matches'      , '^\s*--' ],
5562
                                [ 'remove_inline'       , '--.*$'  ],
5563
                            ],
5564
    'sed'                => [   
5565
                                [ 'remove_matches'      , '^\s*#'  ], 
5566
                                [ 'remove_inline'       , '#.*$'   ],
5567
                            ], 
5568
    'Smarty'             => [   
5569
                                [ 'smarty_to_C'                    ], 
5570
                                [ 'call_regexp_common'  , 'C'      ],
5571
                            ], 
5572
    'Standard ML'        => [   
5573
                                [ 'remove_between_general', '(*', '*)' ],
5574
                            ], 
5575
    'Swift'              => [   
5576
                                [ 'remove_matches'      , '^\s*//' ],
5577
                                [ 'call_regexp_common'  , 'C'      ],
5578
                                [ 'remove_inline'       , '//.*$'  ],
5579
                            ],
5580
 
5581
    'm4'                 => [   [ 'remove_matches'      , '^dnl '  ], ], 
5582
    'C Shell'            => [   
5583
                                [ 'remove_matches'      , '^\s*#'  ], 
5584
                                [ 'remove_inline'       , '#.*$'   ],
5585
                            ], 
5586
    'Kermit'             => [  
5587
                                [ 'remove_matches'      , '^\s*#'  ], 
5588
                                [ 'remove_matches'      , '^\s*;'  ], 
5589
                                [ 'remove_inline'       , '#.*$'   ],
5590
                            ], 
5591
    'Korn Shell'         => [   
5592
                                [ 'remove_matches'      , '^\s*#'  ], 
5593
                                [ 'remove_inline'       , '#.*$'   ],
5594
                            ], 
5595
    'Tcl/Tk'             => [   
5596
                                [ 'remove_matches'      , '^\s*#'  ], 
5597
                                [ 'remove_inline'       , '#.*$'   ],
5598
                            ], 
5599
    'Teamcenter met'     => [   [ 'call_regexp_common'  , 'C'      ], ],
5600
    'Teamcenter mth'     => [   [ 'remove_matches'      , '^\s*#'  ], ], 
5601
    'Titanium Style Sheet'  => [
5602
                                [ 'remove_matches'      , '^\s*//' ], 
5603
                                [ 'remove_inline'       , '//.*$'  ], 
5604
                                [ 'remove_between_regex', '/[^/]', '[^/]/' ],
5605
                            ],
5606
    'TypeScript'         => [   
5607
                                [ 'remove_matches'      , '^\s*//' ], 
5608
                                [ 'remove_inline'       , '//.*$'  ], 
5609
                                [ 'call_regexp_common'  , 'C'      ],
5610
                            ],
5611
    'Unity-Prefab'       => [   
5612
                                [ 'remove_matches'      , '^\s*#'  ], 
5613
                                [ 'remove_inline'       , '#.*$'   ], 
5614
                            ],
5615
    'Visual Fox Pro'     =>  [
5616
                                [ 'remove_matches'      , '^\s*\*' ],
5617
                                [ 'remove_inline'       , '\*.*$'  ],
5618
                                [ 'remove_matches'      , '^\s*&&' ],
5619
                                [ 'remove_inline'       , '&&.*$'  ],
5620
                            ],
5621
    'Softbridge Basic'   => [   [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],               
5622
                                [ 'remove_matches'      , '^\s*Attribute\s+'],
5623
                                [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
5624
    # http://www.altium.com/files/learningguides/TR0114%20VHDL%20Language%20Reference.pdf
5625
    'Vala'               => [   
5626
                                [ 'remove_matches'      , '^\s*//' ], 
5627
                                [ 'call_regexp_common'  , 'C'      ],
5628
                                [ 'remove_inline'       , '//.*$'  ], 
5629
                            ],
5630
    'Vala Header'        => [   
5631
                                [ 'remove_matches'      , '^\s*//' ], 
5632
                                [ 'call_regexp_common'  , 'C'      ],
5633
                                [ 'remove_inline'       , '//.*$'  ], 
5634
                            ],
5635
    'Verilog-SystemVerilog' => [
5636
                                [ 'remove_matches'      , '^\s*//' ], 
5637
                                [ 'remove_inline'       , '//.*$'  ], 
5638
                                [ 'call_regexp_common'  , 'C'      ],
5639
                            ],
5640
    'VHDL'               => [   
5641
                                [ 'remove_matches'      , '^\s*--' ],
5642
                                [ 'remove_matches'      , '^\s*//' ], 
5643
                                [ 'call_regexp_common'  , 'C'      ], 
5644
                                [ 'remove_inline'       , '--.*$'  ],
5645
                                [ 'remove_inline'       , '//.*$'  ], 
5646
                            ],
5647
    'vim script'         => [   
5648
                                [ 'remove_matches'      , '^\s*"'  ], 
5649
                                [ 'remove_inline'       , '".*$'   ], 
5650
                            ],
5651
    'Visual Basic'       => [   [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],               
5652
                                [ 'remove_matches'      , '^\s*Attribute\s+'],
5653
                                [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
5654
    'Visualforce Component' => [
5655
                                [ 'remove_html_comments',          ],
5656
                                [ 'call_regexp_common'  , 'HTML'   ], 
5657
                            ],
5658
    'Visualforce Page'   => [
5659
                                [ 'remove_html_comments',          ],
5660
                                [ 'call_regexp_common'  , 'HTML'   ], 
5661
                            ],
5662
    'Velocity Template Language' => [
5663
                                [ 'remove_html_comments',          ],
5664
                                [ 'call_regexp_common'  , 'HTML'   ],
5665
                                [ 'remove_jsp_comments' ,          ], 
5666
                                [ 'remove_matches'      , '^\s*//' ],
5667
                                [ 'add_newlines'        ,          ],
5668
                                [ 'call_regexp_common'  , 'C'      ],
5669
                            ],                            
5670
    'Teamcenter def'     => [   [ 'remove_matches'      , '^\s*#'  ], ], 
5671
    'Windows Module Definition' => [
5672
                                [ 'remove_matches'      , '^\s*;' ],
5673
                                [ 'remove_inline'       , ';.*$'  ], 
5674
                            ],                            
5675
    'yacc'               => [   
5676
                                [ 'call_regexp_common'  , 'C'      ], 
5677
                                [ 'remove_matches'      , '^\s*//' ], 
5678
                                [ 'remove_inline'       , '//.*$'  ], 
5679
                            ],
5680
    'YAML'               => [   
5681
                                [ 'remove_matches'      , '^\s*#'  ], 
5682
                                [ 'remove_inline'       , '#.*$'   ], 
5683
                            ],
5684
    'lex'                => [   [ 'call_regexp_common'  , 'C'      ], ],
5685
    'XAML'               => [   [ 'remove_html_comments',          ],
5686
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5687
    'xBase Header'       => [
5688
                                [ 'remove_matches'      , '^\s*//' ], 
5689
                                [ 'remove_matches'      , '^\s*\&\&' ], 
5690
                                [ 'remove_matches'      , '^\s*\*' ], 
5691
                                [ 'remove_matches'      , '^\s*NOTE' ], 
5692
                                [ 'remove_matches'      , '^\s*note' ], 
5693
                                [ 'remove_matches'      , '^\s*Note' ], 
5694
                                [ 'remove_inline'       , '//.*$'  ], 
5695
                                [ 'remove_inline'       , '\&\&.*$' ], 
5696
                                [ 'call_regexp_common'  , 'C'      ],
5697
                            ],
5698
    'xBase'              => [
5699
                                [ 'remove_matches'      , '^\s*//' ], 
5700
                                [ 'remove_matches'      , '^\s*\&\&' ], 
5701
                                [ 'remove_matches'      , '^\s*\*' ], 
5702
                                [ 'remove_matches'      , '^\s*NOTE' ], 
5703
                                [ 'remove_matches'      , '^\s*note' ], 
5704
                                [ 'remove_matches'      , '^\s*Note' ], 
5705
                                [ 'remove_inline'       , '//.*$'  ], 
5706
                                [ 'remove_inline'       , '\&\&.*$' ], 
5707
                                [ 'call_regexp_common'  , 'C'      ],
5708
                            ],
5709
    'MXML'               => [   
5710
                                [ 'remove_html_comments',          ],
5711
                                [ 'call_regexp_common'  , 'HTML'   ], 
5712
                                [ 'remove_matches'      , '^\s*//' ], 
5713
                                [ 'add_newlines'        ,          ], 
5714
                                [ 'call_regexp_common'  , 'C'      ], 
5715
                            ],
5716
    'Windows Message File'  => [
5717
                                [ 'remove_matches'      , '^\s*;\s*//' ], 
5718
                                [ 'call_regexp_common'  , 'C'          ], 
5719
                                [ 'remove_matches'      , '^\s*;\s*$'  ], 
5720
#                               next line only hypothetical
5721
#                               [ 'remove_matches_2re'  , '^\s*;\s*/\*',
5722
#                                                         '^\s*;\s*\*/', ],
5723
                            ],
5724
    'Windows Resource File' => [
5725
                                [ 'remove_matches'      , '^\s*//' ], 
5726
                                [ 'remove_inline'       , '//.*$'  ], 
5727
                                [ 'call_regexp_common'  , 'C'      ],
5728
                            ],
5729
    'WiX source'         => [
5730
                                [ 'remove_html_comments',          ],
5731
                                [ 'call_regexp_common'  , 'HTML'   ],
5732
                            ],
5733
    'WiX include'        => [
5734
                                [ 'remove_html_comments',          ],
5735
                                [ 'call_regexp_common'  , 'HTML'   ],
5736
                            ],
5737
    'WiX string localization' => [
5738
                                [ 'remove_html_comments',          ],
5739
                                [ 'call_regexp_common'  , 'HTML'   ],
5740
                            ],
5741
    'XML'                => [   
5742
                                [ 'remove_html_comments',          ],
5743
                                [ 'call_regexp_common'  , 'HTML'   ], 
5744
                            ],
5745
    'XQuery'             => [
5746
                                [ 'remove_between_general', '(:', ':)' ],
5747
                            ],
5748
    'XSD'                => [   [ 'remove_html_comments',          ],
5749
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5750
    'XSLT'               => [   [ 'remove_html_comments',          ],
5751
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5752
    'NAnt script'       => [   [ 'remove_html_comments',          ],
5753
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5754
    'MSBuild script'    => [   [ 'remove_html_comments',          ],
5755
                                [ 'call_regexp_common'  , 'HTML'   ], ],
5756
    );
5757
# 1}}}
5758
%{$rh_EOL_continuation_re} = (               # {{{1
5759
    'ActionScript'       =>     '\\\\$'         ,
5760
    'Assembly'           =>     '\\\\$'         ,
5761
    'ASP'                =>     '\\\\$'         ,
5762
    'ASP.Net'            =>     '\\\\$'         ,
5763
    'Ada'                =>     '\\\\$'         ,
5764
    'awk'                =>     '\\\\$'         ,
5765
    'bc'                 =>     '\\\\$'         ,
5766
    'C'                  =>     '\\\\$'         ,
5767
    'C++'                =>     '\\\\$'         ,
5768
    'C/C++ Header'       =>     '\\\\$'         ,
5769
    'CMake'              =>     '\\\\$'         ,
5770
    'Cython'             =>     '\\\\$'         ,
5771
    'C#'                 =>     '\\\\$'         ,
5772
    'D'                  =>     '\\\\$'         ,
5773
    'Dart'               =>     '\\\\$'         ,
5774
    'Expect'             =>     '\\\\$'         ,
5775
    'Go'                 =>     '\\\\$'         ,
5776
    'IDL'                =>     '\$\\$'         ,
5777
    'Java'               =>     '\\\\$'         ,
5778
    'Javascript'         =>     '\\\\$'         ,
5779
    'LESS'               =>     '\\\\$'         ,
5780
    'Lua'                =>     '\\\\$'         ,
5781
    'make'               =>     '\\\\$'         ,
5782
    'MATLAB'             =>     '\.\.\.\s*$'    ,
5783
    'MXML'               =>     '\\\\$'         ,
5784
    'Objective C'        =>     '\\\\$'         ,
5785
    'Objective C++'      =>     '\\\\$'         ,
5786
    'OCaml'              =>     '\\\\$'         ,
5787
    'Octave'             =>     '\.\.\.\s*$'    ,
5788
    'Qt Project'         =>     '\\\\$'         ,
5789
    'Patran Command Language'=> '\\\\$'         ,
5790
    'PowerShell'         =>     '\\\\$'         ,
5791
    'Python'             =>     '\\\\$'         ,
5792
    'R'                  =>     '\\\\$'         ,
5793
    'Ruby'               =>     '\\\\$'         ,
5794
    'sed'                =>     '\\\\$'         ,
5795
    'Swift'              =>     '\\\\$'         ,
5796
    'Bourne Again Shell' =>     '\\\\$'         ,
5797
    'Bourne Shell'       =>     '\\\\$'         ,
5798
    'C Shell'            =>     '\\\\$'         ,
5799
    'Kermit'             =>     '\\\\$'         ,
5800
    'Korn Shell'         =>     '\\\\$'         ,
5801
    'Tcl/Tk'             =>     '\\\\$'         ,
5802
    'TypeScript'         =>     '\\\\$'         ,
5803
    'lex'                =>     '\\\\$'         ,
5804
    'Vala'               =>     '\\\\$'         ,
5805
    'Vala Header'        =>     '\\\\$'         ,
5806
    );
5807
# 1}}}
5808
%{$rh_Not_Code_Extension}    = (             # {{{1
5809
   '1'       => 1,  # Man pages (documentation):
5810
   '2'       => 1,
5811
   '3'       => 1,
5812
   '4'       => 1,
5813
   '5'       => 1,
5814
   '6'       => 1,
5815
   '7'       => 1,
5816
   '8'       => 1,
5817
   '9'       => 1,
5818
   'a'       => 1,  # Static object code.
5819
   'ad'      => 1,  # X application default resource file.
5820
   'afm'     => 1,  # font metrics
5821
   'arc'     => 1,  # arc(1) archive
5822
   'arj'     => 1,  # arj(1) archive
5823
   'au'      => 1,  # Audio sound filearj(1) archive
5824
   'bak'     => 1,  # Backup files - we only want to count the "real" files.
5825
   'bdf'     => 1,
5826
   'bmp'     => 1,
5827
   'bz2'     => 1,  # bzip2(1) compressed file
5828
   'csv'     => 1,  # comma separated values
5829
   'desktop' => 1,
5830
   'dic'     => 1,
5831
   'doc'     => 1,
5832
   'elc'     => 1,
5833
   'eps'     => 1,
5834
   'fig'     => 1,
5835
   'gif'     => 1,
5836
   'gz'      => 1,
5837
   'hdf'     => 1,  # hierarchical data format
5838
   'in'      => 1,  # Debatable.
5839
   'jpg'     => 1,
5840
   'kdelnk'  => 1,
5841
   'man'     => 1,
5842
   'mf'      => 1,
5843
   'mp3'     => 1,
5844
   'n'       => 1,
5845
   'o'       => 1,  # Object code is generated from source code.
5846
   'pbm'     => 1,
5847
   'pdf'     => 1,
5848
   'pfb'     => 1,
5849
   'png'     => 1,
5850
   'po'      => 1,
5851
   'ps'      => 1,  # Postscript is _USUALLY_ generated automatically.
5852
   'sgm'     => 1,
5853
   'sgml'    => 1,
5854
   'so'      => 1,  # Dynamically-loaded object code.
5855
   'Tag'     => 1,
5856
   'tex'     => 1,
5857
   'text'    => 1,
5858
   'tfm'     => 1,
5859
   'tgz'     => 1,  # gzipped tarball
5860
   'tiff'    => 1,
5861
   'txt'     => 1, 
5862
   'vf'      => 1,
5863
   'wav'     => 1,
5864
   'xbm'     => 1,
5865
   'xpm'     => 1,
5866
   'Y'       => 1,  # file compressed with "Yabba"
5867
   'Z'       => 1,  # file compressed with "compress"
5868
   'zip'     => 1,  # zip archive
5869
); # 1}}}
5870
%{$rh_Not_Code_Filename}     = (             # {{{1
5871
   'AUTHORS'     => 1,
5872
   'BUGS'        => 1,
5873
   'BUGS'        => 1,
5874
   'Changelog'   => 1,
5875
   'ChangeLog'   => 1,
5876
   'ChangeLog'   => 1,
5877
   'Changes'     => 1,
5878
   'CHANGES'     => 1,
5879
   'COPYING'     => 1,
5880
   'COPYING'     => 1,
5881
   '.cvsignore'  => 1,
5882
   'Entries'     => 1,
5883
   'FAQ'         => 1,
5884
   'iconfig.h'   => 1, # Skip "iconfig.h" files; they're used in Imakefiles.
5885
   'INSTALL'     => 1,
5886
   'MAINTAINERS' => 1,
5887
   'MD5SUMS'     => 1,
5888
   'NEWS'        => 1,
5889
   'readme'      => 1,
5890
   'Readme'      => 1,
5891
   'README'      => 1,
5892
   'README.tk'   => 1, # used in kdemultimedia, it's confusing.
5893
   'Repository'  => 1,
5894
   'Root'        => 1, # CVS
5895
   'TODO'        => 1,
5896
);
5897
# 1}}}
5898
%{$rh_Scale_Factor}          = (             # {{{1
5899
    '(unknown)'                    =>   0.00,
5900
    '1032/af'                      =>   5.00,
5901
    '1st generation default'       =>   0.25,
5902
    '2nd generation default'       =>   0.75,
5903
    '3rd generation default'       =>   1.00,
5904
    '4th generation default'       =>   4.00,
5905
    '5th generation default'       =>  16.00,
5906
    'aas macro'                    =>   0.88,
5907
    'abap/4'                       =>   5.00,
5908
    'ABAP'                         =>   5.00,
5909
    'accel'                        =>   4.21,
5910
    'access'                       =>   2.11,
5911
    'ActionScript'                 =>   1.36,
5912
    'actor'                        =>   3.81,
5913
    'acumen'                       =>   2.86,
5914
    'Ada'                          =>   0.52,
5915
    'Ada 83'                       =>   1.13,
5916
    'Ada 95'                       =>   1.63,
5917
    'adr/dl'                       =>   2.00,
5918
    'adr/ideal/pdl'                =>   4.00,
5919
    'ads/batch'                    =>   4.00,
5920
    'ads/online'                   =>   4.00,
5921
    'ADSO/IDSM'                    =>   3.00,
5922
    'advantage'                    =>   2.11,
5923
    'ai shell default'             =>   1.63,
5924
    'ai shells'                    =>   1.63,
5925
    'algol 68'                     =>   0.75,
5926
    'algol w'                      =>   0.75,
5927
    'ambush'                       =>   2.50,
5928
    'aml'                          =>   1.63,
5929
    'AMPLE'                        =>   2.00,
5930
    'Ant/XML'                      =>   1.90,
5931
    'Ant'                          =>   1.90,
5932
    'amppl ii'                     =>   1.25,
5933
    'ansi basic'                   =>   1.25,
5934
    'ansi cobol 74'                =>   0.75,
5935
    'ansi cobol 85'                =>   0.88,
5936
    'SQL'                          =>   6.15,
5937
    'SQL Stored Procedure'         =>   6.15,
5938
    'SQL Data'                     =>   1.00,
5939
    'answer/db'                    =>   6.15,
5940
    'apl 360/370'                  =>   2.50,
5941
    'apl default'                  =>   2.50,
5942
    'apl*plus'                     =>   2.50,
5943
    'applesoft basic'              =>   0.63,
5944
    'application builder'          =>   4.00,
5945
    'application manager'          =>   2.22,
5946
    'aps'                          =>   0.96,
5947
    'aps'                          =>   4.71,
5948
    'apt'                          =>   1.13,
5949
    'aptools'                      =>   4.00,
5950
    'arc'                          =>   1.63,
5951
    'ariel'                        =>   0.75,
5952
    'arity'                        =>   1.63,
5953
    'art'                          =>   1.63,
5954
    'art enterprise'               =>   1.74,
5955
    'artemis'                      =>   2.00,
5956
    'artim'                        =>   1.74,
5957
    'as/set'                       =>   4.21,
5958
    'asi/inquiry'                  =>   6.15,
5959
    'ask windows'                  =>   1.74,
5960
    'asa'                          =>   1.29,
5961
    'ASP'                          =>   1.29,
5962
    'ASP.Net'                      =>   1.29,
5963
    'aspx'                         =>   1.29,
5964
    'asax'                         =>   1.29,
5965
    'ascx'                         =>   1.29,
5966
    'asmx'                         =>   1.29,
5967
    'config'                       =>   1.29,
5968
    'webinfo'                      =>   1.29,
5969
    'CCS'                          =>   5.33,
5970
    'Apex Trigger'                 =>   1.4 ,
5971
    'Arduino Sketch'               =>   1.00,
5972
    'Assembly'                     =>   0.25,
5973
    'Assembly (macro)'             =>   0.51,
5974
    'associative default'          =>   1.25,
5975
    'autocoder'                    =>   0.25,
5976
    'AutoHotkey'                   =>   1.29,
5977
    'awk'                          =>   3.81,
5978
    'aztec c'                      =>   0.63,
5979
    'balm'                         =>   0.75,
5980
    'base sas'                     =>   1.51,
5981
    'basic'                        =>   0.75,
5982
    'basic a'                      =>   0.63,
5983
    'bc'                           =>   1.50,
5984
    'berkeley pascal'              =>   0.88,
5985
    'better basic'                 =>   0.88,
5986
    'bliss'                        =>   0.75,
5987
    'bmsgen'                       =>   2.22,
5988
    'boeingcalc'                   =>  13.33,
5989
    'bteq'                         =>   6.15,
5990
    'C'                            =>   0.77,
5991
    'c set 2'                      =>   0.88,
5992
    'C#'                           =>   1.36,
5993
    'C++'                          =>   1.51,
5994
    'c86plus'                      =>   0.63,
5995
    'cadbfast'                     =>   2.00,
5996
    'caearl'                       =>   2.86,
5997
    'cast'                         =>   1.63,
5998
    'cbasic'                       =>   0.88,
5999
    'cdadl'                        =>   4.00,
6000
    'cellsim'                      =>   1.74,
6001
    'ColdFusion'                   =>   4.00,
6002
    'ColdFusion CFScript'          =>   4.00,
6003
    'chili'                        =>   0.75,
6004
    'chill'                        =>   0.75,
6005
    'cics'                         =>   1.74,
6006
    'clarion'                      =>   1.38,
6007
    'clascal'                      =>   1.00,
6008
    'cli'                          =>   2.50,
6009
    'clipper'                      =>   2.05,
6010
    'clipper db'                   =>   2.00,
6011
    'clos'                         =>   3.81,
6012
    'Clojure'                      =>   1.25,
6013
    'ClojureScript'                =>   1.25,
6014
    'clout'                        =>   2.00,
6015
    'CMake'                        =>   1.00,
6016
    'cms2'                         =>   0.75,
6017
    'cmsgen'                       =>   4.21,
6018
    'COBOL'                        =>   1.04,
6019
    'COBOL ii'                     =>   0.75,
6020
    'COBOL/400'                    =>   0.88,
6021
    'cobra'                        =>   4.00,
6022
    'codecenter'                   =>   2.22,
6023
    'cofac'                        =>   2.22,
6024
    'CoffeeScript'                 =>   2.00,
6025
    'cogen'                        =>   2.22,
6026
    'cognos'                       =>   2.22,
6027
    'cogo'                         =>   1.13,
6028
    'comal'                        =>   1.00,
6029
    'comit ii'                     =>   1.25,
6030
    'common lisp'                  =>   1.25,
6031
    'concurrent pascal'            =>   1.00,
6032
    'conniver'                     =>   1.25,
6033
    'cool:gen/ief'                 =>   2.58,
6034
    'coral 66'                     =>   0.75,
6035
    'corvet'                       =>   4.21,
6036
    'corvision'                    =>   5.33,
6037
    'cpl'                          =>   0.50,
6038
    'Crystal Reports'              =>   4.00,
6039
    'csl'                          =>   1.63,
6040
    'csp'                          =>   1.51,
6041
    'cssl'                         =>   1.74,
6042
    'CSS'                          => 1.0,
6043
    'culprit'                      =>   1.57,
6044
    'CUDA'                         =>   1.00,
6045
    'cxpert'                       =>   1.63,
6046
    'cygnet'                       =>   4.21,
6047
    'D'                            =>   1.70,
6048
    'DAL'                          =>   1.50,
6049
    'Dart'                         =>   2.00,
6050
    'data base default'            =>   2.00,
6051
    'dataflex'                     =>   2.00,
6052
    'datatrieve'                   =>   4.00,
6053
    'dbase iii'                    =>   2.00,
6054
    'dbase iv'                     =>   1.54,
6055
    'dcl'                          =>   0.38,
6056
    'diff'                         =>   1.00,
6057
    'decision support default'     =>   2.22,
6058
    'decrally'                     =>   2.00,
6059
    'delphi'                       =>   2.76,
6060
    'DITA'                         =>   1.90,
6061
    'dl/1'                         =>   2.00,
6062
    'dtrace'                       =>   2.00,
6063
    'NASTRAN DMAP'                 =>   2.35,
6064
    'dna4'                         =>   4.21,
6065
    'DOORS Extension Language'     =>   1.50,
6066
    'DOS Batch'                    =>   0.63,
6067
    'dsp assembly'                 =>   0.50,
6068
    'dtabl'                        =>   1.74,
6069
    'dtipt'                        =>   1.74,
6070
    'dyana'                        =>   1.13,
6071
    'dynamoiii'                    =>   1.74,
6072
    'easel'                        =>   2.76,
6073
    'easy'                         =>   1.63,
6074
    'easytrieve+'                  =>   2.35,
6075
    'eclipse'                      =>   1.63,
6076
    'ECPP'                         =>   1.90,
6077
    'eda/sql'                      =>   6.67,
6078
    'edscheme 3.4'                 =>   1.51,
6079
    'eiffel'                       =>   3.81,
6080
    'Elixir'                       =>   2.11,
6081
    'enform'                       =>   1.74,
6082
    'englishbased default'         =>   1.51,
6083
    'ensemble'                     =>   2.76,
6084
    'epos'                         =>   4.00,
6085
    'ERB'                          =>   2.00,
6086
    'Erlang'                       =>   2.11,
6087
    'esf'                          =>   2.00,
6088
    'espadvisor'                   =>   1.63,
6089
    'espl/i'                       =>   1.13,
6090
    'euclid'                       =>   0.75,
6091
    'excel'                        =>   1.74,
6092
    'excel 12'                     =>  13.33,
6093
    'excel 34'                     =>  13.33,
6094
    'excel 5'                      =>  13.33,
6095
    'express'                      =>   2.22,
6096
    'exsys'                        =>   1.63,
6097
    'extended common lisp'         =>   1.43,
6098
    'eznomad'                      =>   2.22,
6099
    'facets'                       =>   4.00,
6100
    'factorylink iv'               =>   2.76,
6101
    'fame'                         =>   2.22,
6102
    'filemaker pro'                =>   2.22,
6103
    'flavors'                      =>   2.76,
6104
    'flex'                         =>   1.74,
6105
    'flexgen'                      =>   2.76,
6106
    'Focus'                        =>   1.90,
6107
    'foil'                         =>   1.51,
6108
    'forte'                        =>   4.44,
6109
    'forth'                        =>   1.25,
6110
    'Fortran 66'                   =>   0.63,
6111
    'Fortran 77'                   =>   0.75,
6112
    'Fortran 90'                   =>   1.00,
6113
    'Fortran 95'                   =>   1.13,
6114
    'Fortran II'                   =>   0.63,
6115
    'foundation'                   =>   2.76,
6116
    'foxpro'                       =>   2.29,
6117
    'foxpro 1'                     =>   2.00,
6118
    'foxpro 2.5'                   =>   2.35,
6119
    'framework'                    =>  13.33,
6120
    'F#'                           =>   2.50,
6121
    'g2'                           =>   1.63,
6122
    'gamma'                        =>   5.00,
6123
    'genascript'                   =>   2.96,
6124
    'gener/ol'                     =>   6.15,
6125
    'genexus'                      =>   5.33,
6126
    'genifer'                      =>   4.21,
6127
    'geode 2.0'                    =>   5.00,
6128
    'gfa basic'                    =>   2.35,
6129
    'gml'                          =>   1.74,
6130
    'golden common lisp'           =>   1.25,
6131
    'gpss'                         =>   1.74,
6132
    'guest'                        =>   2.86,
6133
    'guru'                         =>   1.63,
6134
    'Go'                           =>   2.50,
6135
    'Grails'                       =>   1.48,
6136
    'Groovy'                       =>   4.10,
6137
    'gw basic'                     =>   0.82,
6138
    'Harbour'                      =>   2.00,
6139
    'Haskell'                      =>   2.11,
6140
    'high c'                       =>   0.63,
6141
    'hlevel'                       =>   1.38,
6142
    'hp basic'                     =>   0.63,
6143
    'Haml'                         =>   2.50,
6144
    'Handlebars'                   =>   2.50,
6145
    'HTML'                         =>   1.90,
6146
    'XML'                          =>   1.90,
6147
    'MXML'                         =>   1.90,
6148
    'XSLT'                         =>   1.90,
6149
    'DTD'                          =>   1.90,
6150
    'XSD'                          =>   1.90,
6151
    'NAnt script'                  =>   1.90,
6152
    'MSBuild script'               =>   1.90, 
6153
    'HLSL'                         =>   2.00,
6154
    'HTML 2'                       =>   5.00,
6155
    'HTML 3'                       =>   5.33,
6156
    'huron'                        =>   5.00,
6157
    'ibm adf i'                    =>   4.00,
6158
    'ibm adf ii'                   =>   4.44,
6159
    'ibm advanced basic'           =>   0.82,
6160
    'ibm cics/vs'                  =>   2.00,
6161
    'ibm compiled basic'           =>   0.88,
6162
    'ibm vs cobol'                 =>   0.75,
6163
    'ibm vs cobol ii'              =>   0.88,
6164
    'ices'                         =>   1.13,
6165
    'icon'                         =>   1.00,
6166
    'ideal'                        =>   1.54,
6167
    'idms'                         =>   2.00,
6168
    'ief'                          =>   5.71,
6169
    'ief/cool:gen'                 =>   2.58,
6170
    'iew'                          =>   5.71,
6171
    'ifps/plus'                    =>   2.50,
6172
    'imprs'                        =>   2.00,
6173
    'informix'                     =>   2.58,
6174
    'ingres'                       =>   2.00,
6175
    'inquire'                      =>   6.15,
6176
    'insight2'                     =>   1.63,
6177
    'install/1'                    =>   5.00,
6178
    'InstallShield'                =>   1.90,
6179
    'intellect'                    =>   1.51,
6180
    'interlisp'                    =>   1.38,
6181
    'interpreted basic'            =>   0.75,
6182
    'interpreted c'                =>   0.63,
6183
    'iqlisp'                       =>   1.38,
6184
    'iqrp'                         =>   6.15,
6185
    'j2ee'                         =>   1.60,
6186
    'janus'                        =>   1.13,
6187
    'Java'                         =>   1.36,
6188
    'Javascript'                   =>   1.48,
6189
    'JavaServer Faces'             =>   1.5 ,
6190
    'JSON'                         =>   2.50,
6191
    'JSP'                          =>   1.48,
6192
    'Velocity Template Language'   =>   1.00,
6193
    'JCL'                          =>   1.67,
6194
    'joss'                         =>   0.75,
6195
    'jovial'                       =>   0.75,
6196
    'jsp'                          =>   1.36,
6197
    'kappa'                        =>   2.00,
6198
    'kbms'                         =>   1.63,
6199
    'kcl'                          =>   1.25,
6200
    'kee'                          =>   1.63,
6201
    'keyplus'                      =>   2.00,
6202
    'kl'                           =>   1.25,
6203
    'klo'                          =>   1.25,
6204
    'knowol'                       =>   1.63,
6205
    'krl'                          =>   1.38,
6206
    'Kermit'                       =>   2.00,
6207
    'Korn Shell'                   =>   3.81,
6208
    'Kotlin'                       =>   2.00,
6209
    'ladder logic'                 =>   2.22,
6210
    'lambit/l'                     =>   1.25,
6211
    'lattice c'                    =>   0.63,
6212
    'LESS'                         =>   1.50,
6213
    'liana'                        =>   0.63,
6214
    'lilith'                       =>   1.13,
6215
    'linc ii'                      =>   5.71,
6216
    'Lisp'                         =>   1.25,
6217
    'LiveLink OScript'             =>   3.5 ,
6218
    'loglisp'                      =>   1.38,
6219
    'loops'                        =>   3.81,
6220
    'lotus 123 dos'                =>  13.33,
6221
    'lotus macros'                 =>   0.75,
6222
    'lotus notes'                  =>   3.64,
6223
    'lucid 3d'                     =>  13.33,
6224
    'lyric'                        =>   1.51,
6225
    'm4'                           =>   1.00,
6226
    'm'                            =>   5.00,
6227
    'macforth'                     =>   1.25,
6228
    'mach1'                        =>   2.00,
6229
    'machine language'             =>   0.13,
6230
    'maestro'                      =>   5.00,
6231
    'magec'                        =>   5.00,
6232
    'magik'                        =>   3.81,
6233
    'Lake'                         =>   3.81,
6234
    'make'                         =>   2.50,
6235
    'mantis'                       =>   2.96,
6236
    'mapper'                       =>   0.99,
6237
    'mark iv'                      =>   2.00,
6238
    'mark v'                       =>   2.22,
6239
    'mathcad'                      =>  16.00,
6240
    'Maven'                        =>   1.90,
6241
    'mdl'                          =>   2.22,
6242
    'mentor'                       =>   1.51,
6243
    'mesa'                         =>   0.75,
6244
    'microfocus cobol'             =>   1.00,
6245
    'microforth'                   =>   1.25,
6246
    'microsoft c'                  =>   0.63,
6247
    'microstep'                    =>   4.00,
6248
    'miranda'                      =>   2.00,
6249
    'model 204'                    =>   2.11,
6250
    'modula 2'                     =>   1.00,
6251
    'mosaic'                       =>  13.33,
6252
    # 'ms c ++ v. 7'                 =>   1.51,
6253
    'ms compiled basic'            =>   0.88,
6254
    'msl'                          =>   1.25,
6255
    'mulisp'                       =>   1.25,
6256
    'MUMPS'                        =>   4.21,
6257
    'Mustache'                     =>   1.75,
6258
    'Nastran'                      =>   1.13,
6259
    'natural'                      =>   1.54,
6260
    'natural 1'                    =>   1.51,
6261
    'natural 2'                    =>   1.74,
6262
    'natural construct'            =>   3.20,
6263
    'natural language'             =>   0.03,
6264
    'netron/cap'                   =>   4.21,
6265
    'nexpert'                      =>   1.63,
6266
    'nial'                         =>   1.63,
6267
    'nomad2'                       =>   2.00,
6268
    'nonprocedural default'        =>   2.22,
6269
    'notes vip'                    =>   2.22,
6270
    'nroff'                        =>   1.51,
6271
    'object assembler'             =>   1.25,
6272
    'object lisp'                  =>   2.76,
6273
    'object logo'                  =>   2.76,
6274
    'object pascal'                =>   2.76,
6275
    'object star'                  =>   5.00,
6276
    'Objective C'                  =>   2.96,
6277
    'Objective C++'                =>   2.96,
6278
    'objectoriented default'       =>   2.76,
6279
    'objectview'                   =>   3.20,
6280
    'OCaml'                        =>   3.00,
6281
    'ogl'                          =>   1.00,
6282
    'omnis 7'                      =>   2.00,
6283
    'oodl'                         =>   2.76,
6284
    'ops'                          =>   1.74,
6285
    'ops5'                         =>   1.38,
6286
    'oracle'                       =>   2.76,
6287
    'Oracle Reports'               =>   2.76,
6288
    'Oracle Forms'                 =>   2.67,
6289
    'Oracle Developer/2000'        =>   3.48,
6290
    'oscar'                        =>   0.75,
6291
    'pacbase'                      =>   1.67,
6292
    'pace'                         =>   2.00,
6293
    'paradox/pal'                  =>   2.22,
6294
    'Pascal'                       =>   0.88,
6295
    'Patran Command Language'      =>   2.50,
6296
    'pc focus'                     =>   2.22,
6297
    'pdl millenium'                =>   3.81,
6298
    'pdp11 ade'                    =>   1.51,
6299
    'peoplesoft'                   =>   2.50,
6300
    'Perl'                         =>   4.00,
6301
    'persistance object builder'   =>   3.81,
6302
    'Pig Latin'                    =>   1.00,
6303
    'pilot'                        =>   1.51,
6304
    'PL/I'                         =>   1.38,
6305
    'pl/1'                         =>   1.38,
6306
    'pl/m'                         =>   1.13,
6307
    'pl/s'                         =>   0.88,
6308
    'pl/sql'                       =>   2.58,
6309
    'planit'                       =>   1.51,
6310
    'planner'                      =>   1.25,
6311
    'planperfect 1'                =>  11.43,
6312
    'plato'                        =>   1.51,
6313
    'polyforth'                    =>   1.25,
6314
    'pop'                          =>   1.38,
6315
    'poplog'                       =>   1.38,
6316
    'power basic'                  =>   1.63,
6317
    'powerbuilder'                 =>   3.33,
6318
    'powerhouse'                   =>   5.71,
6319
    'PowerShell'                   =>   3.00,
6320
    'ppl (plus)'                   =>   2.00,
6321
    'problemoriented default'      =>   1.13,
6322
    'proc'                         =>   2.96,
6323
    'procedural default'           =>   0.75,
6324
    'professional pascal'          =>   0.88,
6325
    'program generator default'    =>   5.00,
6326
    'progress v4'                  =>   2.22,
6327
    'proiv'                        =>   1.38,
6328
    'Prolog'                       =>   1.25,
6329
    'prose'                        =>   0.75,
6330
    'proteus'                      =>   0.75,
6331
    'Protocol Buffers'             =>   2.00,
6332
    'Puppet'                       =>   2.00,
6333
    'PureScript'                   =>   2.00,
6334
    'qbasic'                       =>   1.38,
6335
    'qbe'                          =>   6.15,
6336
    'qmf'                          =>   5.33,
6337
    'QML'                          =>   1.25,
6338
    'Qt Project'                   =>   1.00,
6339
    'qnial'                        =>   1.63,
6340
    'quattro'                      =>  13.33,
6341
    'quattro pro'                  =>  13.33,
6342
    'query default'                =>   6.15,
6343
    'quick basic 1'                =>   1.25,
6344
    'quick basic 2'                =>   1.31,
6345
    'quick basic 3'                =>   1.38,
6346
    'quick c'                      =>   0.63,
6347
    'quickbuild'                   =>   2.86,
6348
    'quiz'                         =>   5.33,
6349
    'R'                            =>   3.00,
6350
    'Racket'                       =>   1.50,
6351
    'rally'                        =>   2.00,
6352
    'ramis ii'                     =>   2.00,
6353
    'rapidgen'                     =>   2.86,
6354
    'ratfor'                       =>   0.88,
6355
    'rdb'                          =>   2.00,
6356
    'realia'                       =>   1.74,
6357
    'realizer 1.0'                 =>   2.00,
6358
    'realizer 2.0'                 =>   2.22,
6359
    'relate/3000'                  =>   2.00,
6360
    'reuse default'                =>  16.00,
6361
    'Razor'                        =>   2.00,
6362
    'Rexx'                         =>   1.19,
6363
    'rm basic'                     =>   0.88,
6364
    'rm cobol'                     =>   0.75,
6365
    'rm fortran'                   =>   0.75,
6366
    'RobotFramework'               =>   2.50,
6367
    'rpg i'                        =>   1.00,
6368
    'rpg ii'                       =>   1.63,
6369
    'rpg iii'                      =>   1.63,
6370
    'rtexpert 1.4'                 =>   1.38,
6371
    'Rust'                         =>   1.00,
6372
    'sabretalk'                    =>   0.90,
6373
    'sail'                         =>   0.75,
6374
    'sapiens'                      =>   5.00,
6375
    'sas'                          =>   1.95,
6376
    'savvy'                        =>   6.15,
6377
    'sbasic'                       =>   0.88,
6378
    'Scala'                        =>   4.10,
6379
    'sceptre'                      =>   1.13,
6380
    'scheme'                       =>   1.51,
6381
    'screen painter default'       =>  13.33,
6382
    'sequal'                       =>   6.67,
6383
    'Bourne Shell'                 =>   3.81,
6384
    'Bourne Again Shell'           =>   3.81,
6385
    'ksh'                          =>   3.81,
6386
    'C Shell'                      =>   3.81,
6387
    'siebel tools '                =>   6.15,
6388
    'SAS'                          =>   1.5 ,
6389
    'SASS'                         =>   1.5 ,
6390
    'simplan'                      =>   2.22,
6391
    'simscript'                    =>   1.74,
6392
    'simula'                       =>   1.74,
6393
    'simula 67'                    =>   1.74,
6394
    'simulation default'           =>   1.74,
6395
    'SKILL'                        =>   2.00,
6396
    'SKILL++'                      =>   2.00,
6397
    'slogan'                       =>   0.98,
6398
    'smalltalk'                    =>   2.50,
6399
    'smalltalk 286'                =>   3.81,
6400
    'smalltalk 80'                 =>   3.81,
6401
    'smalltalk/v'                  =>   3.81,
6402
    'Smarty'                       =>   3.50,
6403
    'snap'                         =>   1.00,
6404
    'snobol24'                     =>   0.63,
6405
    'softscreen'                   =>   5.71,
6406
    'Softbridge Basic'             =>   2.76,
6407
    'solo'                         =>   1.38,
6408
    'speakeasy'                    =>   2.22,
6409
    'spinnaker ppl'                =>   2.22,
6410
    'splus'                        =>   2.50,
6411
    'spreadsheet default'          =>  13.33,
6412
    'sps'                          =>   0.25,
6413
    'spss'                         =>   2.50,
6414
    'SQL'                          =>   2.29,
6415
    'sqlwindows'                   =>   6.67,
6416
    'statistical default'          =>   2.50,
6417
    'Standard ML'                  =>   3.00,
6418
    'strategem'                    =>   2.22,
6419
    'stress'                       =>   1.13,
6420
    'strongly typed default'       =>   0.88,
6421
    'style'                        =>   1.74,
6422
    'superbase 1.3'                =>   2.22,
6423
    'surpass'                      =>  13.33,
6424
    'Swift'                        =>   2.50,
6425
    'sybase'                       =>   2.00,
6426
    'symantec c++'                 =>   2.76,
6427
    'symbolang'                    =>   1.25,
6428
    'synchroworks'                 =>   4.44,
6429
    'synon/2e'                     =>   4.21,
6430
    'systemw'                      =>   2.22,
6431
    'tandem access language'       =>   0.88,
6432
    'Tcl/Tk'                       =>   4.00,
6433
    'Teamcenter def'               =>   1.00,
6434
    'Teamcenter met'               =>   1.00,
6435
    'Teamcenter mth'               =>   1.00,
6436
    'telon'                        =>   5.00,
6437
    'tessaract'                    =>   2.00,
6438
    'the twin'                     =>  13.33,
6439
    'themis'                       =>   6.15,
6440
    'tiief'                        =>   5.71,
6441
    'Titanium Style Sheet'         =>   2.00,
6442
    'topspeed c++'                 =>   2.76,
6443
    'transform'                    =>   5.33,
6444
    'translisp plus'               =>   1.43,
6445
    'treet'                        =>   1.25,
6446
    'treetran'                     =>   1.25,
6447
    'trs80 basic'                  =>   0.63,
6448
    'true basic'                   =>   1.25,
6449
    'turbo c'                      =>   0.63,
6450
    'turbo expert'                 =>   1.63,
6451
    'turbo pascal >5'              =>   1.63,
6452
    'turbo pascal 14'              =>   1.00,
6453
    'turbo pascal 45'              =>   1.13,
6454
    'turing'                       =>   1.00,
6455
    'tutor'                        =>   1.51,
6456
    'twaice'                       =>   1.63,
6457
    'TypeScript'                   =>   2.00,
6458
    'ucsd pascal'                  =>   0.88,
6459
    'ufo/ims'                      =>   2.22,
6460
    'uhelp'                        =>   2.50,
6461
    'uniface'                      =>   5.00,
6462
    'Unity-Prefab'                 =>   2.50,
6463
    'Vala'                         =>   1.50,
6464
    'Vala Header'                  =>   1.40,
6465
    'vax acms'                     =>   1.38,
6466
    'vax ade'                      =>   2.00,
6467
    'vbscript'                     =>   2.35,
6468
    'vectran'                      =>   0.75,
6469
    'Verilog-SystemVerilog'        =>   1.51,
6470
    'VHDL'                         =>   4.21,
6471
    'vim script'                   =>   3.00,
6472
    'visible c'                    =>   1.63,
6473
    'visible cobol'                =>   2.00,
6474
    'visicalc 1'                   =>   8.89,
6475
    'visual 4.0'                   =>   2.76,
6476
    'visual basic'                 =>   1.90,
6477
    'visual basic 1'               =>   1.74,
6478
    'visual basic 2'               =>   1.86,
6479
    'visual basic 3'               =>   2.00,
6480
    'visual basic 4'               =>   2.22,
6481
    'visual basic 5'               =>   2.76,
6482
    'Visual Basic'                 =>   2.76,
6483
    'visual basic dos'             =>   2.00,
6484
    'visual c++'                   =>   2.35,
6485
    'visual cobol'                 =>   4.00,
6486
    'Visual Fox Pro'               =>   4.00, # Visual Fox Pro is not available in the language gearing ratios listed at Mayes Consulting web site
6487
    'visual objects'               =>   5.00,
6488
    'visualage'                    =>   3.81,
6489
    'Visualforce Component'        =>   1.9 ,
6490
    'Visualforce Page'             =>   1.9 ,
6491
    'visualgen'                    =>   4.44,
6492
    'VM'                           =>   2.00,
6493
    'vpf'                          =>   0.84,
6494
    'vulcan'                       =>   1.25,
6495
    'vz programmer'                =>   2.22,
6496
    'warp x'                       =>   2.00,
6497
    'watcom c'                     =>   0.63,
6498
    'watcom c/386'                 =>   0.63,
6499
    'waterloo c'                   =>   0.63,
6500
    'waterloo pascal'              =>   0.88,
6501
    'watfiv'                       =>   0.94,
6502
    'watfor'                       =>   0.88,
6503
    'web scripts'                  =>   5.33,
6504
    'whip'                         =>   0.88,
6505
    'Windows Message File'         =>   1.00,
6506
    'Windows Resource File'        =>   1.00,
6507
    'Windows Module Definition'    =>   1.00,
6508
    'WiX source'                   =>   1.90,
6509
    'WiX include'                  =>   1.90,
6510
    'WiX string localization'      =>   1.90,
6511
    'wizard'                       =>   2.86,
6512
    'xBase'                        =>   2.00,
6513
    'xBase Header'                 =>   2.00,
6514
    'xlisp'                        =>   1.25,
6515
    'XAML'                         =>   1.90,
6516
    'XQuery'                       =>   2.50,
6517
    'yacc'                         =>   1.51,
6518
    'yacc++'                       =>   1.51,
6519
    'YAML'                         =>   0.90,
6520
    'zbasic'                       =>   0.88,
6521
    'zim'                          =>   4.21,
6522
    'zlisp'                        =>   1.25,
6523
    'Expect'                       => 2.00,
6524
    'C/C++ Header'                 => 1.00, 
6525
    'inc'                          => 1.00,
6526
    'lex'                          => 1.00,
6527
    'Julia'                        => 4.00,
6528
    'MATLAB'                       => 4.00,
6529
    'Mercury'                      => 3.00,
6530
    'Maven/XML'                    => 2.5,
6531
    'IDL'                          => 3.80,
6532
    'Octave'                       => 4.00,
6533
    'ML'                           => 3.00,
6534
    'Modula3'                      => 2.00,
6535
    'PHP'                          => 3.50,
6536
    'Python'                       => 4.20,
6537
    'Cython'                       => 3.80,
6538
    'Ruby'                         => 4.20,
6539
    'Ruby HTML'                    => 4.00,
6540
    'sed'                          => 4.00,
6541
    'Lua'                          => 4.00,
6542
    'OpenCL'                       => 1.50,
6543
#   'Lisp/Julia'                   => 4.00,
6544
#   'Lisp/OpenCL'                  => 1.50,
6545
#   'MATLAB/Objective C/MUMPS/Mercury' => 3.00,
6546
);
6547
# 1}}}
6548
%{$rh_Known_Binary_Archives} = (             # {{{1
6549
            '.tar'     => 1 ,
6550
            '.tar.Z'   => 1 ,
6551
            '.tar.gz'  => 1 ,
6552
            '.tar.bz2' => 1 ,
6553
            '.zip'     => 1 ,
6554
            '.Zip'     => 1 ,
6555
            '.ZIP'     => 1 ,
6556
            '.ear'     => 1 ,  # Java
6557
            '.war'     => 1 ,  # contained within .ear
6558
            '.xz'      => 1 ,
6559
            );
6560
# 1}}}
6561
} # end sub set_constants()
6562
sub check_scale_existence {                  # {{{1
6563
    # do a few sanity checks
6564
    my ($rhaa_Filters_by_Language, 
6565
        $rh_Language_by_Extension,
6566
        $rh_Scale_Factor) = @_;
6567
 
6568
    my %extension_collisions = (
6569
        # TODO:  find a better way of dealing with these
6570
        "PHP/Pascal"                        => 1,
6571
        "Lisp/OpenCL"                       => 1,
6572
        "Lisp/Julia"                        => 1,
6573
        "MATLAB/Objective C/MUMPS/Mercury"  => 1,
6574
        "Pascal/Puppet"                     => 1,
6575
        "Perl/Prolog"                       => 1,
6576
        "IDL/Qt Project/Prolog"             => 1,
6577
        "D/dtrace"                          => 1,
6578
    );
6579
    my $OK = 1;
6580
    foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
6581
        next if defined $extension_collisions{$language};
6582
        if (!defined $rh_Scale_Factor->{$language}) {
6583
            $OK = 0;
6584
            warn "Missing scale factor for $language\n";
6585
        }
6586
    }
6587
 
6588
    my %seen_it = ();
6589
    foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
6590
        my $language = $rh_Language_by_Extension->{$ext};
6591
        next if defined $extension_collisions{$language};
6592
        next if $seen_it{$language};
6593
        if (!@{$rhaa_Filters_by_Language->{$language}}) {
6594
            $OK = 0;
6595
            warn "Missing language filter for $language\n";
6596
        }
6597
        $seen_it{$language} = 1;
6598
    }
6599
    die unless $OK;
6600
} # 1}}}
6601
sub Install_Regexp_Common {                  # {{{1
6602
    # Installs portions of Damian Conway's & Abigail's Regexp::Common
6603
    # module, v2.120, into a temporary directory for the duration of
6604
    # this run.
6605
 
6606
    my %Regexp_Common_Contents = ();
6607
$Regexp_Common_Contents{'Common'} = <<'EOCommon'; # {{{2
6608
package Regexp::Common;
6609
 
6610
use 5.00473;
6611
use strict;
6612
 
6613
local $^W = 1;
6614
 
6615
use vars qw /$VERSION %RE %sub_interface $AUTOLOAD/;
6616
 
6617
($VERSION) = q $Revision: 2.120 $ =~ /([\d.]+)/;
6618
 
6619
 
6620
sub _croak {
6621
    require Carp;
6622
    goto &Carp::croak;
6623
}
6624
 
6625
sub _carp {
6626
    require Carp;
6627
    goto &Carp::carp;
6628
}
6629
 
6630
sub new {
6631
    my ($class, @data) = @_;
6632
    my %self;
6633
    tie %self, $class, @data;
6634
    return \%self;
6635
}
6636
 
6637
sub TIEHASH {
6638
    my ($class, @data) = @_;
6639
    bless \@data, $class;
6640
}
6641
 
6642
sub FETCH {
6643
    my ($self, $extra) = @_;
6644
    return bless ref($self)->new(@$self, $extra), ref($self);
6645
}
6646
 
6647
# Modification for cloc:  only need a few modules from Regexp::Common.
6648
my %imports = map {$_ => "Regexp::Common::$_"}
6649
              qw /balanced comment delimited /;
6650
#my %imports = map {$_ => "Regexp::Common::$_"}
6651
#              qw /balanced CC     comment   delimited lingua list
6652
#                  net      number profanity SEN       URI    whitespace
6653
#                  zip/;
6654
 
6655
sub import {
6656
    shift;  # Shift off the class.
6657
    tie %RE, __PACKAGE__;
6658
    {
6659
        no strict 'refs';
6660
        *{caller() . "::RE"} = \%RE;
6661
    }
6662
 
6663
    my $saw_import;
6664
    my $no_defaults;
6665
    my %exclude;
6666
    foreach my $entry (grep {!/^RE_/} @_) {
6667
        if ($entry eq 'pattern') {
6668
            no strict 'refs';
6669
            *{caller() . "::pattern"} = \&pattern;
6670
            next;
6671
        }
6672
        # This used to prevent $; from being set. We still recognize it,
6673
        # but we won't do anything.
6674
        if ($entry eq 'clean') {
6675
            next;
6676
        }
6677
        if ($entry eq 'no_defaults') {
6678
            $no_defaults ++;
6679
            next;
6680
        }
6681
        if (my $module = $imports {$entry}) {
6682
            $saw_import ++;
6683
            eval "require $module;";
6684
            die $@ if $@;
6685
            next;
6686
        }
6687
        if ($entry =~ /^!(.*)/ && $imports {$1}) {
6688
            $exclude {$1} ++;
6689
            next;
6690
        }
6691
        # As a last resort, try to load the argument.
6692
        my $module = $entry =~ /^Regexp::Common/
6693
                            ? $entry
6694
                            : "Regexp::Common::" . $entry;
6695
        eval "require $module;";
6696
        die $@ if $@;
6697
    }
6698
 
6699
    unless ($saw_import || $no_defaults) {
6700
        foreach my $module (values %imports) {
6701
            next if $exclude {$module};
6702
            eval "require $module;";
6703
            die $@ if $@;
6704
        }
6705
    }
6706
 
6707
    my %exported;
6708
    foreach my $entry (grep {/^RE_/} @_) {
6709
        if ($entry =~ /^RE_(\w+_)?ALL$/) {
6710
            my $m  = defined $1 ? $1 : "";
6711
            my $re = qr /^RE_${m}.*$/;
6712
            while (my ($sub, $interface) = each %sub_interface) {
6713
                next if $exported {$sub};
6714
                next unless $sub =~ /$re/;
6715
                {
6716
                    no strict 'refs';
6717
                    *{caller() . "::$sub"} = $interface;
6718
                }
6719
                $exported {$sub} ++;
6720
            }
6721
        }
6722
        else {
6723
            next if $exported {$entry};
6724
            _croak "Can't export unknown subroutine &$entry"
6725
                unless $sub_interface {$entry};
6726
            {
6727
                no strict 'refs';
6728
                *{caller() . "::$entry"} = $sub_interface {$entry};
6729
            }
6730
            $exported {$entry} ++;
6731
        }
6732
    }
6733
}
6734
 
6735
sub AUTOLOAD { _croak "Can't $AUTOLOAD" }
6736
 
6737
sub DESTROY {}
6738
 
6739
my %cache;
6740
 
6741
my $fpat = qr/^(-\w+)/;
6742
 
6743
sub _decache {
6744
        my @args = @{tied %{$_[0]}};
6745
        my @nonflags = grep {!/$fpat/} @args;
6746
        my $cache = get_cache(@nonflags);
6747
        _croak "Can't create unknown regex: \$RE{"
6748
            . join("}{",@args) . "}"
6749
                unless exists $cache->{__VAL__};
6750
        _croak "Perl $] does not support the pattern "
6751
            . "\$RE{" . join("}{",@args)
6752
            . "}.\nYou need Perl $cache->{__VAL__}{version} or later"
6753
                unless ($cache->{__VAL__}{version}||0) <= $];
6754
        my %flags = ( %{$cache->{__VAL__}{default}},
6755
                      map { /$fpat\Q$;\E(.*)/ ? ($1 => $2)
6756
                          : /$fpat/           ? ($1 => undef)
6757
                          :                     ()
6758
                          } @args);
6759
        $cache->{__VAL__}->_clone_with(\@args, \%flags);
6760
}
6761
 
6762
use overload q{""} => \&_decache;
6763
 
6764
 
6765
sub get_cache {
6766
        my $cache = \%cache;
6767
        foreach (@_) {
6768
                $cache = $cache->{$_}
6769
                      || ($cache->{$_} = {});
6770
        }
6771
        return $cache;
6772
}
6773
 
6774
sub croak_version {
6775
        my ($entry, @args) = @_;
6776
}
6777
 
6778
sub pattern {
6779
        my %spec = @_;
6780
        _croak 'pattern() requires argument: name => [ @list ]'
6781
                unless $spec{name} && ref $spec{name} eq 'ARRAY';
6782
        _croak 'pattern() requires argument: create => $sub_ref_or_string'
6783
                unless $spec{create};
6784
 
6785
        if (ref $spec{create} ne "CODE") {
6786
                my $fixed_str = "$spec{create}";
6787
                $spec{create} = sub { $fixed_str }
6788
        }
6789
 
6790
        my @nonflags;
6791
        my %default;
6792
        foreach ( @{$spec{name}} ) {
6793
                if (/$fpat=(.*)/) {
6794
                        $default{$1} = $2;
6795
                }
6796
                elsif (/$fpat\s*$/) {
6797
                        $default{$1} = undef;
6798
                }
6799
                else {
6800
                        push @nonflags, $_;
6801
                }
6802
        }
6803
 
6804
        my $entry = get_cache(@nonflags);
6805
 
6806
        if ($entry->{__VAL__}) {
6807
                _carp "Overriding \$RE{"
6808
                   . join("}{",@nonflags)
6809
                   . "}";
6810
        }
6811
 
6812
        $entry->{__VAL__} = bless {
6813
                                create  => $spec{create},
6814
                                match   => $spec{match} || \&generic_match,
6815
                                subs    => $spec{subs}  || \&generic_subs,
6816
                                version => $spec{version},
6817
                                default => \%default,
6818
                            }, 'Regexp::Common::Entry';
6819
 
6820
        foreach (@nonflags) {s/\W/X/g}
6821
        my $subname = "RE_" . join ("_", @nonflags);
6822
        $sub_interface{$subname} = sub {
6823
                push @_ => undef if @_ % 2;
6824
                my %flags = @_;
6825
                my $pat = $spec{create}->($entry->{__VAL__},
6826
                               {%default, %flags}, \@nonflags);
6827
                if (exists $flags{-keep}) { $pat =~ s/\Q(?k:/(/g; }
6828
                else { $pat =~ s/\Q(?k:/(?:/g; }
6829
                return exists $flags {-i} ? qr /(?i:$pat)/ : qr/$pat/;
6830
        };
6831
 
6832
        return 1;
6833
}
6834
 
6835
sub generic_match {$_ [1] =~  /$_[0]/}
6836
sub generic_subs  {$_ [1] =~ s/$_[0]/$_[2]/}
6837
 
6838
sub matches {
6839
        my ($self, $str) = @_;
6840
        my $entry = $self -> _decache;
6841
        $entry -> {match} -> ($entry, $str);
6842
}
6843
 
6844
sub subs {
6845
        my ($self, $str, $newstr) = @_;
6846
        my $entry = $self -> _decache;
6847
        $entry -> {subs} -> ($entry, $str, $newstr);
6848
        return $str;
6849
}
6850
 
6851
 
6852
package Regexp::Common::Entry;
6853
# use Carp;
6854
 
6855
local $^W = 1;
6856
 
6857
use overload
6858
    q{""} => sub {
6859
        my ($self) = @_;
6860
        my $pat = $self->{create}->($self, $self->{flags}, $self->{args});
6861
        if (exists $self->{flags}{-keep}) {
6862
            $pat =~ s/\Q(?k:/(/g;
6863
        }
6864
        else {
6865
            $pat =~ s/\Q(?k:/(?:/g;
6866
        }
6867
        if (exists $self->{flags}{-i})   { $pat = "(?i)$pat" }
6868
        return $pat;
6869
    };
6870
 
6871
sub _clone_with {
6872
    my ($self, $args, $flags) = @_;
6873
    bless { %$self, args=>$args, flags=>$flags }, ref $self;
6874
}
6875
# 
6876
#    Copyright (c) 2001 - 2005, Damian Conway and Abigail. All Rights
6877
#  Reserved. This module is free software. It may be used, redistributed
6878
#      and/or modified under the terms of the Perl Artistic License
6879
#            (see http://www.perl.com/perl/misc/Artistic.html)
6880
EOCommon
6881
# 2}}}
6882
$Regexp_Common_Contents{'Common/comment'} = <<'EOC';   # {{{2
6883
# $Id: comment.pm,v 2.116 2005/03/16 00:00:02 abigail Exp $
6884
 
6885
package Regexp::Common::comment;
6886
 
6887
use strict;
6888
local $^W = 1;
6889
 
6890
use Regexp::Common qw /pattern clean no_defaults/;
6891
use vars qw /$VERSION/;
6892
 
6893
($VERSION) = q $Revision: 2.116 $ =~ /[\d.]+/g;
6894
 
6895
my @generic = (
6896
    {languages => [qw /ABC Forth/],
6897
     to_eol    => ['\\\\']},   # This is for just a *single* backslash.
6898
 
6899
    {languages => [qw /Ada Alan Eiffel lua/],
6900
     to_eol    => ['--']},
6901
 
6902
    {languages => [qw /Advisor/],
6903
     to_eol    => ['#|//']},
6904
 
6905
    {languages => [qw /Advsys CQL Lisp LOGO M MUMPS REBOL Scheme
6906
                       SMITH zonefile/],
6907
     to_eol    => [';']},
6908
 
6909
    {languages => ['Algol 60'],
6910
     from_to   => [[qw /comment ;/]]},
6911
 
6912
    {languages => [qw {ALPACA B C C-- LPC PL/I}],
6913
     from_to   => [[qw {/* */}]]},
6914
 
6915
    {languages => [qw /awk fvwm2 Icon mutt Perl Python QML R Ruby shell Tcl/],
6916
     to_eol    => ['#']},
6917
 
6918
    {languages => [[BASIC => 'mvEnterprise']],
6919
     to_eol    => ['[*!]|REM']},
6920
 
6921
    {languages => [qw /Befunge-98 Funge-98 Shelta/],
6922
     id        => [';']},
6923
 
6924
    {languages => ['beta-Juliet', 'Crystal Report', 'Portia'],
6925
     to_eol    => ['//']},
6926
 
6927
    {languages => ['BML'],
6928
     from_to   => [['<?_c', '_c?>']],
6929
    },
6930
 
6931
    {languages => [qw /C++/, 'C#', qw /Cg ECMAScript FPL Java JavaScript/],
6932
     to_eol    => ['//'],
6933
     from_to   => [[qw {/* */}]]},
6934
 
6935
    {languages => [qw /CLU LaTeX slrn TeX/],
6936
     to_eol    => ['%']},
6937
 
6938
    {languages => [qw /False/],
6939
     from_to   => [[qw !{ }!]]},
6940
 
6941
    {languages => [qw /Fortran/],
6942
     to_eol    => ['!']},
6943
 
6944
    {languages => [qw /Haifu/],
6945
     id        => [',']},
6946
 
6947
    {languages => [qw /ILLGOL/],
6948
     to_eol    => ['NB']},
6949
 
6950
    {languages => [qw /INTERCAL/],
6951
     to_eol    => [q{(?:(?:PLEASE(?:\s+DO)?|DO)\s+)?(?:NOT|N'T)}]},
6952
 
6953
    {languages => [qw /J/],
6954
     to_eol    => ['NB[.]']},
6955
 
6956
    {languages => [qw /Nickle/],
6957
     to_eol    => ['#'],
6958
     from_to   => [[qw {/* */}]]},
6959
 
6960
    {languages => [qw /Oberon/],
6961
     from_to   => [[qw /(* *)/]]},
6962
 
6963
    {languages => [[qw /Pascal Delphi/], [qw /Pascal Free/], [qw /Pascal GPC/]],
6964
     to_eol    => ['//'],
6965
     from_to   => [[qw !{ }!], [qw !(* *)!]]},
6966
 
6967
    {languages => [[qw /Pascal Workshop/]],
6968
     id        => [qw /"/],
6969
     from_to   => [[qw !{ }!], [qw !(* *)!], [qw !/* */!]]},
6970
 
6971
    {languages => [qw /PEARL/],
6972
     to_eol    => ['!'],
6973
     from_to   => [[qw {/* */}]]},
6974
 
6975
    {languages => [qw /PHP/],
6976
     to_eol    => ['#', '//'],
6977
     from_to   => [[qw {/* */}]]},
6978
 
6979
    {languages => [qw !PL/B!],
6980
     to_eol    => ['[.;]']},
6981
 
6982
    {languages => [qw !PL/SQL!],
6983
     to_eol    => ['--'],
6984
     from_to   => [[qw {/* */}]]},
6985
 
6986
    {languages => [qw /Q-BAL/],
6987
     to_eol    => ['`']},
6988
 
6989
    {languages => [qw /Smalltalk/],
6990
     id        => ['"']},
6991
 
6992
    {languages => [qw /SQL/],
6993
     to_eol    => ['-{2,}']},
6994
 
6995
    {languages => [qw /troff/],
6996
     to_eol    => ['\\\"']},
6997
 
6998
    {languages => [qw /vi/],
6999
     to_eol    => ['"']},
7000
 
7001
    {languages => [qw /*W/],
7002
     from_to   => [[qw {|| !!}]]},
7003
);
7004
 
7005
my @plain_or_nested = (
7006
   [Caml         =>  undef,       "(*"  => "*)"],
7007
   [Dylan        =>  "//",        "/*"  => "*/"],
7008
   [Haskell      =>  "-{2,}",     "{-"  => "-}"],
7009
   [Hugo         =>  "!(?!\\\\)", "!\\" => "\\!"],
7010
   [SLIDE        =>  "#",         "(*"  => "*)"],
7011
);
7012
 
7013
#
7014
# Helper subs.
7015
#
7016
 
7017
sub combine      {
7018
    local $_ = join "|", @_;
7019
    if (@_ > 1) {
7020
        s/\(\?k:/(?:/g;
7021
        $_ = "(?k:$_)";
7022
    }
7023
    $_
7024
}
7025
 
7026
sub to_eol  ($)  {"(?k:(?k:$_[0])(?k:[^\\n]*)(?k:\\n))"}
7027
sub id      ($)  {"(?k:(?k:$_[0])(?k:[^$_[0]]*)(?k:$_[0]))"}  # One char only!
7028
sub from_to      {
7029
    local $^W = 1;
7030
    my ($begin, $end) = @_;
7031
 
7032
    my $qb  = quotemeta $begin;
7033
    my $qe  = quotemeta $end;
7034
    my $fe  = quotemeta substr $end   => 0, 1;
7035
    my $te  = quotemeta substr $end   => 1;
7036
 
7037
    "(?k:(?k:$qb)(?k:(?:[^$fe]+|$fe(?!$te))*)(?k:$qe))";
7038
}
7039
 
7040
 
7041
my $count = 0;
7042
sub nested {
7043
    local $^W = 1;
7044
    my ($begin, $end) = @_;
7045
 
7046
    $count ++;
7047
    my $r = '(??{$Regexp::Common::comment ['. $count . ']})';
7048
 
7049
    my $qb  = quotemeta $begin;
7050
    my $qe  = quotemeta $end;
7051
    my $fb  = quotemeta substr $begin => 0, 1;
7052
    my $fe  = quotemeta substr $end   => 0, 1;
7053
 
7054
    my $tb  = quotemeta substr $begin => 1;
7055
    my $te  = quotemeta substr $end   => 1;
7056
 
7057
    use re 'eval';
7058
 
7059
    my $re;
7060
    if ($fb eq $fe) {
7061
        $re = qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
7062
    }
7063
    else {
7064
        local $"      =  "|";
7065
        my   @clauses =  "(?>[^$fb$fe]+)";
7066
        push @clauses => "$fb(?!$tb)" if length $tb;
7067
        push @clauses => "$fe(?!$te)" if length $te;
7068
        push @clauses =>  $r;
7069
        $re           =   qr /(?:$qb(?:@clauses)*$qe)/;
7070
    }
7071
 
7072
    $Regexp::Common::comment [$count] = qr/$re/;
7073
}
7074
 
7075
#
7076
# Process data.
7077
#
7078
 
7079
foreach my $info (@plain_or_nested) {
7080
    my ($language, $mark, $begin, $end) = @$info;
7081
    pattern name    => [comment => $language],
7082
            create  =>
7083
                sub {my $re     = nested $begin => $end;
7084
                     my $prefix = defined $mark ? $mark . "[^\n]*\n|" : "";
7085
                     exists $_ [1] -> {-keep} ? qr /($prefix$re)/
7086
                                              : qr  /$prefix$re/
7087
                },
7088
            version => 5.006,
7089
            ;
7090
}
7091
 
7092
 
7093
foreach my $group (@generic) {
7094
    my $pattern = combine +(map {to_eol   $_} @{$group -> {to_eol}}),
7095
                           (map {from_to @$_} @{$group -> {from_to}}),
7096
                           (map {id       $_} @{$group -> {id}}),
7097
                  ;
7098
    foreach my $language  (@{$group -> {languages}}) {
7099
        pattern name    => [comment => ref $language ? @$language : $language],
7100
                create  => $pattern,
7101
                ;
7102
    }
7103
}
7104
 
7105
 
7106
 
7107
#
7108
# Other languages.
7109
#
7110
 
7111
# http://www.pascal-central.com/docs/iso10206.txt
7112
pattern name    => [qw /comment Pascal/],
7113
        create  => '(?k:' . '(?k:[{]|[(][*])'
7114
                          . '(?k:[^}*]*(?:[*][^)][^}*]*)*)'
7115
                          . '(?k:[}]|[*][)])'
7116
                          . ')'
7117
        ;
7118
 
7119
# http://www.templetons.com/brad/alice/language/
7120
pattern name    =>  [qw /comment Pascal Alice/],
7121
        create  =>  '(?k:(?k:[{])(?k:[^}\n]*)(?k:[}]))'
7122
        ;
7123
 
7124
 
7125
# http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt
7126
pattern name    => [qw (comment), 'Algol 68'],
7127
        create  => q {(?k:(?:#[^#]*#)|}                           .
7128
                   q {(?:\bco\b(?:[^c]+|\Bc|\bc(?!o\b))*\bco\b)|} .
7129
                   q {(?:\bcomment\b(?:[^c]+|\Bc|\bc(?!omment\b))*\bcomment\b))}
7130
        ;
7131
 
7132
 
7133
# See rules 91 and 92 of ISO 8879 (SGML).
7134
# Charles F. Goldfarb: "The SGML Handbook".
7135
# Oxford: Oxford University Press. 1990. ISBN 0-19-853737-9.
7136
# Ch. 10.3, pp 390.
7137
pattern name    => [qw (comment HTML)],
7138
        create  => q {(?k:(?k:<!)(?k:(?:--(?k:[^-]*(?:-[^-]+)*)--\s*)*)(?k:>))},
7139
        ;
7140
 
7141
 
7142
pattern name    => [qw /comment SQL MySQL/],
7143
        create  => q {(?k:(?:#|-- )[^\n]*\n|} .
7144
                   q {/\*(?:(?>[^*;"']+)|"[^"]*"|'[^']*'|\*(?!/))*(?:;|\*/))},
7145
        ;
7146
 
7147
# Anything that isn't <>[]+-.,
7148
# http://home.wxs.nl/~faase009/Ha_BF.html
7149
pattern name    => [qw /comment Brainfuck/],
7150
        create  => '(?k:[^<>\[\]+\-.,]+)'
7151
        ;
7152
 
7153
# Squeak is a variant of Smalltalk-80.
7154
# http://www.squeak.
7155
# http://mucow.com/squeak-qref.html
7156
pattern name    => [qw /comment Squeak/],
7157
        create  => '(?k:(?k:")(?k:[^"]*(?:""[^"]*)*)(?k:"))'
7158
        ;
7159
 
7160
#
7161
# Scores of less than 5 or above 17....
7162
# http://www.cliff.biffle.org/esoterica/beatnik.html
7163
@Regexp::Common::comment::scores = (1,  3,  3,  2,  1,  4,  2,  4,  1,  8,
7164
                                    5,  1,  3,  1,  1,  3, 10,  1,  1,  1,
7165
                                    1,  4,  4,  8,  4, 10);
7166
pattern name    =>  [qw /comment Beatnik/],
7167
        create  =>  sub {
7168
            use re 'eval';
7169
            my ($s, $x);
7170
            my $re = qr {\b([A-Za-z]+)\b
7171
                         (?(?{($s, $x) = (0, lc $^N);
7172
                              $s += $Regexp::Common::comment::scores
7173
                                    [ord (chop $x) - ord ('a')] while length $x;
7174
                              $s  >= 5 && $s < 18})XXX|)}x;
7175
            $re;
7176
        },
7177
        version  => 5.008,
7178
        ;
7179
 
7180
 
7181
# http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/
7182
#  (Goto table of contents/3.3 Source Form)
7183
# Fortran, in fixed format. Comments start with a C, c or * in the first
7184
# column, or a ! anywhere, but the sixth column. Then end with a newline.
7185
pattern name    =>  [qw /comment Fortran fixed/],
7186
        create  =>  '(?k:(?k:(?:^[Cc*]|(?<!^.....)!))(?k:[^\n]*)(?k:\n))'
7187
        ;
7188
 
7189
 
7190
# http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm
7191
# Traditionally, comments in COBOL were indicated with an asteriks in
7192
# the seventh column. Modern compilers may be more lenient.
7193
pattern name    =>  [qw /comment COBOL/],
7194
        create  =>  '(?<=^......)(?k:(?k:[*])(?k:[^\n]*)(?k:\n))',
7195
        version =>  '5.008',
7196
        ;
7197
 
7198
1;
7199
#
7200
#    Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
7201
#      This module is free software. It may be used, redistributed
7202
#     and/or modified under the terms of the Perl Artistic License
7203
#           (see http://www.perl.com/perl/misc/Artistic.html)
7204
EOC
7205
# 2}}}
7206
$Regexp_Common_Contents{'Common/balanced'} = <<'EOB';   # {{{2
7207
package Regexp::Common::balanced; {
7208
 
7209
use strict;
7210
local $^W = 1;
7211
 
7212
use vars qw /$VERSION/;
7213
($VERSION) = q $Revision: 2.101 $ =~ /[\d.]+/g;
7214
 
7215
use Regexp::Common qw /pattern clean no_defaults/;
7216
 
7217
my %closer = ( '{'=>'}', '('=>')', '['=>']', '<'=>'>' );
7218
my $count = -1;
7219
my %cache;
7220
 
7221
sub nested {
7222
    local $^W = 1;
7223
    my ($start, $finish) = @_;
7224
 
7225
    return $Regexp::Common::balanced [$cache {$start} {$finish}]
7226
            if exists $cache {$start} {$finish};
7227
 
7228
    $count ++;
7229
    my $r = '(??{$Regexp::Common::balanced ['. $count . ']})';
7230
 
7231
    my @starts   = map {s/\\(.)/$1/g; $_} grep {length}
7232
                        $start  =~ /([^|\\]+|\\.)+/gs;
7233
    my @finishes = map {s/\\(.)/$1/g; $_} grep {length}
7234
                        $finish =~ /([^|\\]+|\\.)+/gs;
7235
 
7236
    push @finishes => ($finishes [-1]) x (@starts - @finishes);
7237
 
7238
    my @re;
7239
    local $" = "|";
7240
    foreach my $begin (@starts) {
7241
        my $end = shift @finishes;
7242
 
7243
        my $qb  = quotemeta $begin;
7244
        my $qe  = quotemeta $end;
7245
        my $fb  = quotemeta substr $begin => 0, 1;
7246
        my $fe  = quotemeta substr $end   => 0, 1;
7247
 
7248
        my $tb  = quotemeta substr $begin => 1;
7249
        my $te  = quotemeta substr $end   => 1;
7250
 
7251
        use re 'eval';
7252
 
7253
        my $add;
7254
        if ($fb eq $fe) {
7255
            push @re =>
7256
                   qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
7257
        }
7258
        else {
7259
            my   @clauses =  "(?>[^$fb$fe]+)";
7260
            push @clauses => "$fb(?!$tb)" if length $tb;
7261
            push @clauses => "$fe(?!$te)" if length $te;
7262
            push @clauses =>  $r;
7263
            push @re      =>  qr /(?:$qb(?:@clauses)*$qe)/;
7264
        }
7265
    }
7266
 
7267
    $cache {$start} {$finish} = $count;
7268
    $Regexp::Common::balanced [$count] = qr/@re/;
7269
}
7270
 
7271
 
7272
pattern name    => [qw /balanced -parens=() -begin= -end=/],
7273
        create  => sub {
7274
            my $flag = $_[1];
7275
            unless (defined $flag -> {-begin} && length $flag -> {-begin} &&
7276
                    defined $flag -> {-end}   && length $flag -> {-end}) {
7277
                my @open  = grep {index ($flag->{-parens}, $_) >= 0}
7278
                             ('[','(','{','<');
7279
                my @close = map {$closer {$_}} @open;
7280
                $flag -> {-begin} = join "|" => @open;
7281
                $flag -> {-end}   = join "|" => @close;
7282
            }
7283
            my $pat = nested @$flag {qw /-begin -end/};
7284
            return exists $flag -> {-keep} ? qr /($pat)/ : $pat;
7285
        },
7286
        version => 5.006,
7287
        ;
7288
 
7289
}
7290
 
7291
1;
7292
#
7293
#     Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
7294
#       This module is free software. It may be used, redistributed
7295
#      and/or modified under the terms of the Perl Artistic License
7296
#            (see http://www.perl.com/perl/misc/Artistic.html)
7297
EOB
7298
# 2}}}
7299
$Regexp_Common_Contents{'Common/delimited'} = <<'EOD';   # {{{2
7300
# $Id: delimited.pm,v 2.104 2005/03/16 00:22:45 abigail Exp $
7301
 
7302
package Regexp::Common::delimited;
7303
 
7304
use strict;
7305
local $^W = 1;
7306
 
7307
use Regexp::Common qw /pattern clean no_defaults/;
7308
use vars qw /$VERSION/;
7309
 
7310
($VERSION) = q $Revision: 2.104 $ =~ /[\d.]+/g;
7311
 
7312
sub gen_delimited {
7313
 
7314
    my ($dels, $escs) = @_;
7315
    # return '(?:\S*)' unless $dels =~ /\S/;
7316
    if (length $escs) {
7317
        $escs .= substr ($escs, -1) x (length ($dels) - length ($escs));
7318
    }
7319
    my @pat = ();
7320
    my $i;
7321
    for ($i=0; $i < length $dels; $i++) {
7322
        my $del = quotemeta substr ($dels, $i, 1);
7323
        my $esc = length($escs) ? quotemeta substr ($escs, $i, 1) : "";
7324
        if ($del eq $esc) {
7325
            push @pat,
7326
                 "(?k:$del)(?k:[^$del]*(?:(?:$del$del)[^$del]*)*)(?k:$del)";
7327
        }
7328
        elsif (length $esc) {
7329
            push @pat,
7330
                 "(?k:$del)(?k:[^$esc$del]*(?:$esc.[^$esc$del]*)*)(?k:$del)";
7331
        }
7332
        else {
7333
            push @pat, "(?k:$del)(?k:[^$del]*)(?k:$del)";
7334
        }
7335
    }
7336
    my $pat = join '|', @pat;
7337
    return "(?k:$pat)";
7338
}
7339
 
7340
sub _croak {
7341
    require Carp;
7342
    goto &Carp::croak;
7343
}
7344
 
7345
pattern name   => [qw( delimited -delim= -esc=\\ )],
7346
        create => sub {my $flags = $_[1];
7347
                       _croak 'Must specify delimiter in $RE{delimited}'
7348
                             unless length $flags->{-delim};
7349
                       return gen_delimited (@{$flags}{-delim, -esc});
7350
                  },
7351
        ;
7352
 
7353
pattern name   => [qw( quoted -esc=\\ )],
7354
        create => sub {my $flags = $_[1];
7355
                       return gen_delimited (q{"'`}, $flags -> {-esc});
7356
                  },
7357
        ;
7358
 
7359
 
7360
1;
7361
#
7362
#     Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
7363
#       This module is free software. It may be used, redistributed
7364
#      and/or modified under the terms of the Perl Artistic License
7365
#            (see http://www.perl.com/perl/misc/Artistic.html)
7366
EOD
7367
# 2}}}
7368
    my $problems        = 0;
7369
    $HAVE_Rexexp_Common = 0;
7370
    my $dir             = "";
7371
    if ($opt_sdir) {
7372
        ++$TEMP_OFF;
7373
        $dir = "$opt_sdir/$TEMP_OFF";
7374
        File::Path::rmtree($dir) if     is_dir($dir);
7375
        File::Path::mkpath($dir) unless is_dir($dir);
7376
    } else {
7377
        # let File::Temp create a suitable temporary directory
7378
        $dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
7379
        $TEMP_INST{ $dir } = "Regexp::Common";
7380
    }
7381
    print "Using temp dir [$dir] to install Regexp::Common\n" if $opt_v;
7382
    my $Regexp_dir        = "$dir/Regexp";
7383
    my $Regexp_Common_dir = "$dir/Regexp/Common";
7384
    mkdir $Regexp_dir       ;
7385
    mkdir $Regexp_Common_dir;
7386
 
7387
    foreach my $module_file (keys %Regexp_Common_Contents) {
7388
        my $OUT = new IO::File "$dir/Regexp/${module_file}.pm", "w";
7389
        if (defined $OUT) {
7390
            print $OUT $Regexp_Common_Contents{$module_file};
7391
            $OUT->close;
7392
        } else {
7393
            warn "Failed to install Regexp::${module_file}.pm\n";
7394
            $problems = 1;
7395
        }
7396
    }
7397
 
7398
    push @INC, $dir;
7399
    eval "use Regexp::Common qw /comment RE_comment_HTML balanced/";
7400
    $HAVE_Rexexp_Common = 1 unless $problems;
7401
} # 1}}}
7402
sub Install_Algorithm_Diff {                 # {{{1
7403
    # Installs Tye McQueen's Algorithm::Diff module, v1.1902, into a 
7404
    # temporary directory for the duration of this run.
7405
 
7406
my $Algorithm_Diff_Contents = <<'EOAlgDiff'; # {{{2
7407
package Algorithm::Diff;
7408
# Skip to first "=head" line for documentation.
7409
use strict;
7410
 
7411
use integer;    # see below in _replaceNextLargerWith() for mod to make
7412
                # if you don't use this
7413
use vars qw( $VERSION @EXPORT_OK );
7414
$VERSION = 1.19_02;
7415
#          ^ ^^ ^^-- Incremented at will
7416
#          | \+----- Incremented for non-trivial changes to features
7417
#          \-------- Incremented for fundamental changes
7418
require Exporter;
7419
*import    = \&Exporter::import;
7420
@EXPORT_OK = qw(
7421
    prepare LCS LCSidx LCS_length
7422
    diff sdiff compact_diff
7423
    traverse_sequences traverse_balanced
7424
);
7425
 
7426
# McIlroy-Hunt diff algorithm
7427
# Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
7428
# by Ned Konz, perl@bike-nomad.com
7429
# Updates by Tye McQueen, http://perlmonks.org/?node=tye
7430
 
7431
# Create a hash that maps each element of $aCollection to the set of
7432
# positions it occupies in $aCollection, restricted to the elements
7433
# within the range of indexes specified by $start and $end.
7434
# The fourth parameter is a subroutine reference that will be called to
7435
# generate a string to use as a key.
7436
# Additional parameters, if any, will be passed to this subroutine.
7437
#
7438
# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen );
7439
 
7440
sub _withPositionsOfInInterval
7441
{
7442
    my $aCollection = shift;    # array ref
7443
    my $start       = shift;
7444
    my $end         = shift;
7445
    my $keyGen      = shift;
7446
    my %d;
7447
    my $index;
7448
    for ( $index = $start ; $index <= $end ; $index++ )
7449
    {
7450
        my $element = $aCollection->[$index];
7451
        my $key = &$keyGen( $element, @_ );
7452
        if ( exists( $d{$key} ) )
7453
        {
7454
            unshift ( @{ $d{$key} }, $index );
7455
        }
7456
        else
7457
        {
7458
            $d{$key} = [$index];
7459
        }
7460
    }
7461
    return wantarray ? %d : \%d;
7462
}
7463
 
7464
# Find the place at which aValue would normally be inserted into the
7465
# array. If that place is already occupied by aValue, do nothing, and
7466
# return undef. If the place does not exist (i.e., it is off the end of
7467
# the array), add it to the end, otherwise replace the element at that
7468
# point with aValue.  It is assumed that the array's values are numeric.
7469
# This is where the bulk (75%) of the time is spent in this module, so
7470
# try to make it fast!
7471
 
7472
sub _replaceNextLargerWith
7473
{
7474
    my ( $array, $aValue, $high ) = @_;
7475
    $high ||= $#$array;
7476
 
7477
    # off the end?
7478
    if ( $high == -1 || $aValue > $array->[-1] )
7479
    {
7480
        push ( @$array, $aValue );
7481
        return $high + 1;
7482
    }
7483
 
7484
    # binary search for insertion point...
7485
    my $low = 0;
7486
    my $index;
7487
    my $found;
7488
    while ( $low <= $high )
7489
    {
7490
        $index = ( $high + $low ) / 2;
7491
 
7492
        # $index = int(( $high + $low ) / 2);  # without 'use integer'
7493
        $found = $array->[$index];
7494
 
7495
        if ( $aValue == $found )
7496
        {
7497
            return undef;
7498
        }
7499
        elsif ( $aValue > $found )
7500
        {
7501
            $low = $index + 1;
7502
        }
7503
        else
7504
        {
7505
            $high = $index - 1;
7506
        }
7507
    }
7508
 
7509
    # now insertion point is in $low.
7510
    $array->[$low] = $aValue;    # overwrite next larger
7511
    return $low;
7512
}
7513
 
7514
# This method computes the longest common subsequence in $a and $b.
7515
 
7516
# Result is array or ref, whose contents is such that
7517
#   $a->[ $i ] == $b->[ $result[ $i ] ]
7518
# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined.
7519
 
7520
# An additional argument may be passed; this is a hash or key generating
7521
# function that should return a string that uniquely identifies the given
7522
# element.  It should be the case that if the key is the same, the elements
7523
# will compare the same. If this parameter is undef or missing, the key
7524
# will be the element as a string.
7525
 
7526
# By default, comparisons will use "eq" and elements will be turned into keys
7527
# using the default stringizing operator '""'.
7528
 
7529
# Additional parameters, if any, will be passed to the key generation
7530
# routine.
7531
 
7532
sub _longestCommonSubsequence
7533
{
7534
    my $a        = shift;    # array ref or hash ref
7535
    my $b        = shift;    # array ref or hash ref
7536
    my $counting = shift;    # scalar
7537
    my $keyGen   = shift;    # code ref
7538
    my $compare;             # code ref
7539
 
7540
    if ( ref($a) eq 'HASH' )
7541
    {                        # prepared hash must be in $b
7542
        my $tmp = $b;
7543
        $b = $a;
7544
        $a = $tmp;
7545
    }
7546
 
7547
    # Check for bogus (non-ref) argument values
7548
    if ( !ref($a) || !ref($b) )
7549
    {
7550
        my @callerInfo = caller(1);
7551
        die 'error: must pass array or hash references to ' . $callerInfo[3];
7552
    }
7553
 
7554
    # set up code refs
7555
    # Note that these are optimized.
7556
    if ( !defined($keyGen) )    # optimize for strings
7557
    {
7558
        $keyGen = sub { $_[0] };
7559
        $compare = sub { my ( $a, $b ) = @_; $a eq $b };
7560
    }
7561
    else
7562
    {
7563
        $compare = sub {
7564
            my $a = shift;
7565
            my $b = shift;
7566
            &$keyGen( $a, @_ ) eq &$keyGen( $b, @_ );
7567
        };
7568
    }
7569
 
7570
    my ( $aStart, $aFinish, $matchVector ) = ( 0, $#$a, [] );
7571
    my ( $prunedCount, $bMatches ) = ( 0, {} );
7572
 
7573
    if ( ref($b) eq 'HASH' )    # was $bMatches prepared for us?
7574
    {
7575
        $bMatches = $b;
7576
    }
7577
    else
7578
    {
7579
        my ( $bStart, $bFinish ) = ( 0, $#$b );
7580
 
7581
        # First we prune off any common elements at the beginning
7582
        while ( $aStart <= $aFinish
7583
            and $bStart <= $bFinish
7584
            and &$compare( $a->[$aStart], $b->[$bStart], @_ ) )
7585
        {
7586
            $matchVector->[ $aStart++ ] = $bStart++;
7587
            $prunedCount++;
7588
        }
7589
 
7590
        # now the end
7591
        while ( $aStart <= $aFinish
7592
            and $bStart <= $bFinish
7593
            and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) )
7594
        {
7595
            $matchVector->[ $aFinish-- ] = $bFinish--;
7596
            $prunedCount++;
7597
        }
7598
 
7599
        # Now compute the equivalence classes of positions of elements
7600
        $bMatches =
7601
          _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ );
7602
    }
7603
    my $thresh = [];
7604
    my $links  = [];
7605
 
7606
    my ( $i, $ai, $j, $k );
7607
    for ( $i = $aStart ; $i <= $aFinish ; $i++ )
7608
    {
7609
        $ai = &$keyGen( $a->[$i], @_ );
7610
        if ( exists( $bMatches->{$ai} ) )
7611
        {
7612
            $k = 0;
7613
            for $j ( @{ $bMatches->{$ai} } )
7614
            {
7615
 
7616
                # optimization: most of the time this will be true
7617
                if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j )
7618
                {
7619
                    $thresh->[$k] = $j;
7620
                }
7621
                else
7622
                {
7623
                    $k = _replaceNextLargerWith( $thresh, $j, $k );
7624
                }
7625
 
7626
                # oddly, it's faster to always test this (CPU cache?).
7627
                if ( defined($k) )
7628
                {
7629
                    $links->[$k] =
7630
                      [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ];
7631
                }
7632
            }
7633
        }
7634
    }
7635
 
7636
    if (@$thresh)
7637
    {
7638
        return $prunedCount + @$thresh if $counting;
7639
        for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] )
7640
        {
7641
            $matchVector->[ $link->[1] ] = $link->[2];
7642
        }
7643
    }
7644
    elsif ($counting)
7645
    {
7646
        return $prunedCount;
7647
    }
7648
 
7649
    return wantarray ? @$matchVector : $matchVector;
7650
}
7651
 
7652
sub traverse_sequences
7653
{
7654
    my $a                 = shift;          # array ref
7655
    my $b                 = shift;          # array ref
7656
    my $callbacks         = shift || {};
7657
    my $keyGen            = shift;
7658
    my $matchCallback     = $callbacks->{'MATCH'} || sub { };
7659
    my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
7660
    my $finishedACallback = $callbacks->{'A_FINISHED'};
7661
    my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
7662
    my $finishedBCallback = $callbacks->{'B_FINISHED'};
7663
    my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
7664
 
7665
    # Process all the lines in @$matchVector
7666
    my $lastA = $#$a;
7667
    my $lastB = $#$b;
7668
    my $bi    = 0;
7669
    my $ai;
7670
 
7671
    for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ )
7672
    {
7673
        my $bLine = $matchVector->[$ai];
7674
        if ( defined($bLine) )    # matched
7675
        {
7676
            &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine;
7677
            &$matchCallback( $ai,    $bi++, @_ );
7678
        }
7679
        else
7680
        {
7681
            &$discardACallback( $ai, $bi, @_ );
7682
        }
7683
    }
7684
 
7685
    # The last entry (if any) processed was a match.
7686
    # $ai and $bi point just past the last matching lines in their sequences.
7687
 
7688
    while ( $ai <= $lastA or $bi <= $lastB )
7689
    {
7690
 
7691
        # last A?
7692
        if ( $ai == $lastA + 1 and $bi <= $lastB )
7693
        {
7694
            if ( defined($finishedACallback) )
7695
            {
7696
                &$finishedACallback( $lastA, @_ );
7697
                $finishedACallback = undef;
7698
            }
7699
            else
7700
            {
7701
                &$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB;
7702
            }
7703
        }
7704
 
7705
        # last B?
7706
        if ( $bi == $lastB + 1 and $ai <= $lastA )
7707
        {
7708
            if ( defined($finishedBCallback) )
7709
            {
7710
                &$finishedBCallback( $lastB, @_ );
7711
                $finishedBCallback = undef;
7712
            }
7713
            else
7714
            {
7715
                &$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA;
7716
            }
7717
        }
7718
 
7719
        &$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA;
7720
        &$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB;
7721
    }
7722
 
7723
    return 1;
7724
}
7725
 
7726
sub traverse_balanced
7727
{
7728
    my $a                 = shift;              # array ref
7729
    my $b                 = shift;              # array ref
7730
    my $callbacks         = shift || {};
7731
    my $keyGen            = shift;
7732
    my $matchCallback     = $callbacks->{'MATCH'} || sub { };
7733
    my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
7734
    my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
7735
    my $changeCallback    = $callbacks->{'CHANGE'};
7736
    my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
7737
 
7738
    # Process all the lines in match vector
7739
    my $lastA = $#$a;
7740
    my $lastB = $#$b;
7741
    my $bi    = 0;
7742
    my $ai    = 0;
7743
    my $ma    = -1;
7744
    my $mb;
7745
 
7746
    while (1)
7747
    {
7748
 
7749
        # Find next match indices $ma and $mb
7750
        do {
7751
            $ma++;
7752
        } while(
7753
                $ma <= $#$matchVector
7754
            &&  !defined $matchVector->[$ma]
7755
        );
7756
 
7757
        last if $ma > $#$matchVector;    # end of matchVector?
7758
        $mb = $matchVector->[$ma];
7759
 
7760
        # Proceed with discard a/b or change events until
7761
        # next match
7762
        while ( $ai < $ma || $bi < $mb )
7763
        {
7764
 
7765
            if ( $ai < $ma && $bi < $mb )
7766
            {
7767
 
7768
                # Change
7769
                if ( defined $changeCallback )
7770
                {
7771
                    &$changeCallback( $ai++, $bi++, @_ );
7772
                }
7773
                else
7774
                {
7775
                    &$discardACallback( $ai++, $bi, @_ );
7776
                    &$discardBCallback( $ai, $bi++, @_ );
7777
                }
7778
            }
7779
            elsif ( $ai < $ma )
7780
            {
7781
                &$discardACallback( $ai++, $bi, @_ );
7782
            }
7783
            else
7784
            {
7785
 
7786
                # $bi < $mb
7787
                &$discardBCallback( $ai, $bi++, @_ );
7788
            }
7789
        }
7790
 
7791
        # Match
7792
        &$matchCallback( $ai++, $bi++, @_ );
7793
    }
7794
 
7795
    while ( $ai <= $lastA || $bi <= $lastB )
7796
    {
7797
        if ( $ai <= $lastA && $bi <= $lastB )
7798
        {
7799
 
7800
            # Change
7801
            if ( defined $changeCallback )
7802
            {
7803
                &$changeCallback( $ai++, $bi++, @_ );
7804
            }
7805
            else
7806
            {
7807
                &$discardACallback( $ai++, $bi, @_ );
7808
                &$discardBCallback( $ai, $bi++, @_ );
7809
            }
7810
        }
7811
        elsif ( $ai <= $lastA )
7812
        {
7813
            &$discardACallback( $ai++, $bi, @_ );
7814
        }
7815
        else
7816
        {
7817
 
7818
            # $bi <= $lastB
7819
            &$discardBCallback( $ai, $bi++, @_ );
7820
        }
7821
    }
7822
 
7823
    return 1;
7824
}
7825
 
7826
sub prepare
7827
{
7828
    my $a       = shift;    # array ref
7829
    my $keyGen  = shift;    # code ref
7830
 
7831
    # set up code ref
7832
    $keyGen = sub { $_[0] } unless defined($keyGen);
7833
 
7834
    return scalar _withPositionsOfInInterval( $a, 0, $#$a, $keyGen, @_ );
7835
}
7836
 
7837
sub LCS
7838
{
7839
    my $a = shift;                  # array ref
7840
    my $b = shift;                  # array ref or hash ref
7841
    my $matchVector = _longestCommonSubsequence( $a, $b, 0, @_ );
7842
    my @retval;
7843
    my $i;
7844
    for ( $i = 0 ; $i <= $#$matchVector ; $i++ )
7845
    {
7846
        if ( defined( $matchVector->[$i] ) )
7847
        {
7848
            push ( @retval, $a->[$i] );
7849
        }
7850
    }
7851
    return wantarray ? @retval : \@retval;
7852
}
7853
 
7854
sub LCS_length
7855
{
7856
    my $a = shift;                          # array ref
7857
    my $b = shift;                          # array ref or hash ref
7858
    return _longestCommonSubsequence( $a, $b, 1, @_ );
7859
}
7860
 
7861
sub LCSidx
7862
{
7863
    my $a= shift @_;
7864
    my $b= shift @_;
7865
    my $match= _longestCommonSubsequence( $a, $b, 0, @_ );
7866
    my @am= grep defined $match->[$_], 0..$#$match;
7867
    my @bm= @{$match}[@am];
7868
    return \@am, \@bm;
7869
}
7870
 
7871
sub compact_diff
7872
{
7873
    my $a= shift @_;
7874
    my $b= shift @_;
7875
    my( $am, $bm )= LCSidx( $a, $b, @_ );
7876
    my @cdiff;
7877
    my( $ai, $bi )= ( 0, 0 );
7878
    push @cdiff, $ai, $bi;
7879
    while( 1 ) {
7880
        while(  @$am  &&  $ai == $am->[0]  &&  $bi == $bm->[0]  ) {
7881
            shift @$am;
7882
            shift @$bm;
7883
            ++$ai, ++$bi;
7884
        }
7885
        push @cdiff, $ai, $bi;
7886
        last   if  ! @$am;
7887
        $ai = $am->[0];
7888
        $bi = $bm->[0];
7889
        push @cdiff, $ai, $bi;
7890
    }
7891
    push @cdiff, 0+@$a, 0+@$b
7892
        if  $ai < @$a || $bi < @$b;
7893
    return wantarray ? @cdiff : \@cdiff;
7894
}
7895
 
7896
sub diff
7897
{
7898
    my $a      = shift;    # array ref
7899
    my $b      = shift;    # array ref
7900
    my $retval = [];
7901
    my $hunk   = [];
7902
    my $discard = sub {
7903
        push @$hunk, [ '-', $_[0], $a->[ $_[0] ] ];
7904
    };
7905
    my $add = sub {
7906
        push @$hunk, [ '+', $_[1], $b->[ $_[1] ] ];
7907
    };
7908
    my $match = sub {
7909
        push @$retval, $hunk
7910
            if 0 < @$hunk;
7911
        $hunk = []
7912
    };
7913
    traverse_sequences( $a, $b,
7914
        { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ );
7915
    &$match();
7916
    return wantarray ? @$retval : $retval;
7917
}
7918
 
7919
sub sdiff
7920
{
7921
    my $a      = shift;    # array ref
7922
    my $b      = shift;    # array ref
7923
    my $retval = [];
7924
    my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) };
7925
    my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) };
7926
    my $change = sub {
7927
        push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] );
7928
    };
7929
    my $match = sub {
7930
        push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] );
7931
    };
7932
    traverse_balanced(
7933
        $a,
7934
        $b,
7935
        {
7936
            MATCH     => $match,
7937
            DISCARD_A => $discard,
7938
            DISCARD_B => $add,
7939
            CHANGE    => $change,
7940
        },
7941
        @_
7942
    );
7943
    return wantarray ? @$retval : $retval;
7944
}
7945
 
7946
########################################
7947
my $Root= __PACKAGE__;
7948
package Algorithm::Diff::_impl;
7949
use strict;
7950
 
7951
sub _Idx()  { 0 } # $me->[_Idx]: Ref to array of hunk indices
7952
            # 1   # $me->[1]: Ref to first sequence
7953
            # 2   # $me->[2]: Ref to second sequence
7954
sub _End()  { 3 } # $me->[_End]: Diff between forward and reverse pos
7955
sub _Same() { 4 } # $me->[_Same]: 1 if pos 1 contains unchanged items
7956
sub _Base() { 5 } # $me->[_Base]: Added to range's min and max
7957
sub _Pos()  { 6 } # $me->[_Pos]: Which hunk is currently selected
7958
sub _Off()  { 7 } # $me->[_Off]: Offset into _Idx for current position
7959
sub _Min() { -2 } # Added to _Off to get min instead of max+1
7960
 
7961
sub Die
7962
{
7963
    require Carp;
7964
    Carp::confess( @_ );
7965
}
7966
 
7967
sub _ChkPos
7968
{
7969
    my( $me )= @_;
7970
    return   if  $me->[_Pos];
7971
    my $meth= ( caller(1) )[3];
7972
    Die( "Called $meth on 'reset' object" );
7973
}
7974
 
7975
sub _ChkSeq
7976
{
7977
    my( $me, $seq )= @_;
7978
    return $seq + $me->[_Off]
7979
        if  1 == $seq  ||  2 == $seq;
7980
    my $meth= ( caller(1) )[3];
7981
    Die( "$meth: Invalid sequence number ($seq); must be 1 or 2" );
7982
}
7983
 
7984
sub getObjPkg
7985
{
7986
    my( $us )= @_;
7987
    return ref $us   if  ref $us;
7988
    return $us . "::_obj";
7989
}
7990
 
7991
sub new
7992
{
7993
    my( $us, $seq1, $seq2, $opts ) = @_;
7994
    my @args;
7995
    for( $opts->{keyGen} ) {
7996
        push @args, $_   if  $_;
7997
    }
7998
    for( $opts->{keyGenArgs} ) {
7999
        push @args, @$_   if  $_;
8000
    }
8001
    my $cdif= Algorithm::Diff::compact_diff( $seq1, $seq2, @args );
8002
    my $same= 1;
8003
    if(  0 == $cdif->[2]  &&  0 == $cdif->[3]  ) {
8004
        $same= 0;
8005
        splice @$cdif, 0, 2;
8006
    }
8007
    my @obj= ( $cdif, $seq1, $seq2 );
8008
    $obj[_End] = (1+@$cdif)/2;
8009
    $obj[_Same] = $same;
8010
    $obj[_Base] = 0;
8011
    my $me = bless \@obj, $us->getObjPkg();
8012
    $me->Reset( 0 );
8013
    return $me;
8014
}
8015
 
8016
sub Reset
8017
{
8018
    my( $me, $pos )= @_;
8019
    $pos= int( $pos || 0 );
8020
    $pos += $me->[_End]
8021
        if  $pos < 0;
8022
    $pos= 0
8023
        if  $pos < 0  ||  $me->[_End] <= $pos;
8024
    $me->[_Pos]= $pos || !1;
8025
    $me->[_Off]= 2*$pos - 1;
8026
    return $me;
8027
}
8028
 
8029
sub Base
8030
{
8031
    my( $me, $base )= @_;
8032
    my $oldBase= $me->[_Base];
8033
    $me->[_Base]= 0+$base   if  defined $base;
8034
    return $oldBase;
8035
}
8036
 
8037
sub Copy
8038
{
8039
    my( $me, $pos, $base )= @_;
8040
    my @obj= @$me;
8041
    my $you= bless \@obj, ref($me);
8042
    $you->Reset( $pos )   if  defined $pos;
8043
    $you->Base( $base );
8044
    return $you;
8045
}
8046
 
8047
sub Next {
8048
    my( $me, $steps )= @_;
8049
    $steps= 1   if  ! defined $steps;
8050
    if( $steps ) {
8051
        my $pos= $me->[_Pos];
8052
        my $new= $pos + $steps;
8053
        $new= 0   if  $pos  &&  $new < 0;
8054
        $me->Reset( $new )
8055
    }
8056
    return $me->[_Pos];
8057
}
8058
 
8059
sub Prev {
8060
    my( $me, $steps )= @_;
8061
    $steps= 1   if  ! defined $steps;
8062
    my $pos= $me->Next(-$steps);
8063
    $pos -= $me->[_End]   if  $pos;
8064
    return $pos;
8065
}
8066
 
8067
sub Diff {
8068
    my( $me )= @_;
8069
    $me->_ChkPos();
8070
    return 0   if  $me->[_Same] == ( 1 & $me->[_Pos] );
8071
    my $ret= 0;
8072
    my $off= $me->[_Off];
8073
    for my $seq ( 1, 2 ) {
8074
        $ret |= $seq
8075
            if  $me->[_Idx][ $off + $seq + _Min ]
8076
            <   $me->[_Idx][ $off + $seq ];
8077
    }
8078
    return $ret;
8079
}
8080
 
8081
sub Min {
8082
    my( $me, $seq, $base )= @_;
8083
    $me->_ChkPos();
8084
    my $off= $me->_ChkSeq($seq);
8085
    $base= $me->[_Base] if !defined $base;
8086
    return $base + $me->[_Idx][ $off + _Min ];
8087
}
8088
 
8089
sub Max {
8090
    my( $me, $seq, $base )= @_;
8091
    $me->_ChkPos();
8092
    my $off= $me->_ChkSeq($seq);
8093
    $base= $me->[_Base] if !defined $base;
8094
    return $base + $me->[_Idx][ $off ] -1;
8095
}
8096
 
8097
sub Range {
8098
    my( $me, $seq, $base )= @_;
8099
    $me->_ChkPos();
8100
    my $off = $me->_ChkSeq($seq);
8101
    if( !wantarray ) {
8102
        return  $me->[_Idx][ $off ]
8103
            -   $me->[_Idx][ $off + _Min ];
8104
    }
8105
    $base= $me->[_Base] if !defined $base;
8106
    return  ( $base + $me->[_Idx][ $off + _Min ] )
8107
        ..  ( $base + $me->[_Idx][ $off ] - 1 );
8108
}
8109
 
8110
sub Items {
8111
    my( $me, $seq )= @_;
8112
    $me->_ChkPos();
8113
    my $off = $me->_ChkSeq($seq);
8114
    if( !wantarray ) {
8115
        return  $me->[_Idx][ $off ]
8116
            -   $me->[_Idx][ $off + _Min ];
8117
    }
8118
    return
8119
        @{$me->[$seq]}[
8120
                $me->[_Idx][ $off + _Min ]
8121
            ..  ( $me->[_Idx][ $off ] - 1 )
8122
        ];
8123
}
8124
 
8125
sub Same {
8126
    my( $me )= @_;
8127
    $me->_ChkPos();
8128
    return wantarray ? () : 0
8129
        if  $me->[_Same] != ( 1 & $me->[_Pos] );
8130
    return $me->Items(1);
8131
}
8132
 
8133
my %getName;
8134
BEGIN {
8135
    %getName= (
8136
        same => \&Same,
8137
        diff => \&Diff,
8138
        base => \&Base,
8139
        min  => \&Min,
8140
        max  => \&Max,
8141
        range=> \&Range,
8142
        items=> \&Items, # same thing
8143
    );
8144
}
8145
 
8146
sub Get
8147
{
8148
    my $me= shift @_;
8149
    $me->_ChkPos();
8150
    my @value;
8151
    for my $arg (  @_  ) {
8152
        for my $word (  split ' ', $arg  ) {
8153
            my $meth;
8154
            if(     $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/
8155
                ||  not  $meth= $getName{ lc $2 }
8156
            ) {
8157
                Die( $Root, ", Get: Invalid request ($word)" );
8158
            }
8159
            my( $base, $name, $seq )= ( $1, $2, $3 );
8160
            push @value, scalar(
8161
                4 == length($name)
8162
                    ? $meth->( $me )
8163
                    : $meth->( $me, $seq, $base )
8164
            );
8165
        }
8166
    }
8167
    if(  wantarray  ) {
8168
        return @value;
8169
    } elsif(  1 == @value  ) {
8170
        return $value[0];
8171
    }
8172
    Die( 0+@value, " values requested from ",
8173
        $Root, "'s Get in scalar context" );
8174
}
8175
 
8176
 
8177
my $Obj= getObjPkg($Root);
8178
no strict 'refs';
8179
 
8180
for my $meth (  qw( new getObjPkg )  ) {
8181
    *{$Root."::".$meth} = \&{$meth};
8182
    *{$Obj ."::".$meth} = \&{$meth};
8183
}
8184
for my $meth (  qw(
8185
    Next Prev Reset Copy Base Diff
8186
    Same Items Range Min Max Get
8187
    _ChkPos _ChkSeq
8188
)  ) {
8189
    *{$Obj."::".$meth} = \&{$meth};
8190
}
8191
 
8192
1;
8193
# This version released by Tye McQueen (http://perlmonks.org/?node=tye).
8194
# 
8195
# =head1 LICENSE
8196
# 
8197
# Parts Copyright (c) 2000-2004 Ned Konz.  All rights reserved.
8198
# Parts by Tye McQueen.
8199
# 
8200
# This program is free software; you can redistribute it and/or modify it
8201
# under the same terms as Perl.
8202
# 
8203
# =head1 MAILING LIST
8204
# 
8205
# Mark-Jason still maintains a mailing list.  To join a low-volume mailing
8206
# list for announcements related to diff and Algorithm::Diff, send an
8207
# empty mail message to mjd-perl-diff-request@plover.com.
8208
# =head1 CREDITS
8209
# 
8210
# Versions through 0.59 (and much of this documentation) were written by:
8211
# 
8212
# Mark-Jason Dominus, mjd-perl-diff@plover.com
8213
# 
8214
# This version borrows some documentation and routine names from
8215
# Mark-Jason's, but Diff.pm's code was completely replaced.
8216
# 
8217
# This code was adapted from the Smalltalk code of Mario Wolczko
8218
# <mario@wolczko.com>, which is available at
8219
# ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st
8220
# 
8221
# C<sdiff> and C<traverse_balanced> were written by Mike Schilli
8222
# <m@perlmeister.com>.
8223
# 
8224
# The algorithm is that described in
8225
# I<A Fast Algorithm for Computing Longest Common Subsequences>,
8226
# CACM, vol.20, no.5, pp.350-353, May 1977, with a few
8227
# minor improvements to improve the speed.
8228
# 
8229
# Much work was done by Ned Konz (perl@bike-nomad.com).
8230
# 
8231
# The OO interface and some other changes are by Tye McQueen.
8232
# 
8233
EOAlgDiff
8234
# 2}}}
8235
    my $problems        = 0;
8236
    $HAVE_Algorith_Diff = 0;
8237
    my $dir             = "";
8238
    if ($opt_sdir) {
8239
        ++$TEMP_OFF;
8240
        $dir = "$opt_sdir/$TEMP_OFF";
8241
        File::Path::rmtree($dir) if     is_dir($dir);
8242
        File::Path::mkpath($dir) unless is_dir($dir);
8243
    } else {
8244
        # let File::Temp create a suitable temporary directory
8245
        $dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
8246
        $TEMP_INST{ $dir } = "Algorithm::Diff";
8247
    }
8248
    print "Using temp dir [$dir] to install Algorithm::Diff\n" if $opt_v;
8249
    my $Algorithm_dir      = "$dir/Algorithm";
8250
    my $Algorithm_Diff_dir = "$dir/Algorithm/Diff";
8251
    mkdir $Algorithm_dir     ;
8252
    mkdir $Algorithm_Diff_dir;
8253
 
8254
    my $OUT = new IO::File "$dir/Algorithm/Diff.pm", "w";
8255
    if (defined $OUT) {
8256
        print $OUT $Algorithm_Diff_Contents;
8257
        $OUT->close;
8258
    } else {
8259
        warn "Failed to install Algorithm/Diff.pm\n";
8260
        $problems = 1;
8261
    }
8262
 
8263
    push @INC, $dir;  # between this & Regexp::Common only need to do once
8264
    eval "use Algorithm::Diff qw / sdiff /";
8265
    $HAVE_Algorith_Diff = 1 unless $problems;
8266
} # 1}}}
8267
sub call_regexp_common {                     # {{{1
8268
    my ($ra_lines, $language ) = @_;
8269
    print "-> call_regexp_common\n" if $opt_v > 2;
8270
 
8271
    Install_Regexp_Common() unless $HAVE_Rexexp_Common;
8272
 
8273
    my $all_lines = join("", @{$ra_lines});
8274
 
8275
    no strict 'vars';
8276
    # otherwise get:
8277
    #  Global symbol "%RE" requires explicit package name at cloc line xx.
8278
    if ($all_lines =~ $RE{comment}{$language}) {
8279
        # Suppress "Use of uninitialized value in regexp compilation" that
8280
        # pops up when $1 is undefined--happens if there's a bug in the $RE
8281
        # This Pascal comment will trigger it:
8282
        #         (* This is { another } test. **)
8283
        # Curiously, testing for "defined $1" breaks the substitution.
8284
        no warnings; 
8285
        # remove   comments
8286
        $all_lines =~ s/$1//g;
8287
    }
8288
    # a bogus use of %RE to avoid:
8289
    # Name "main::RE" used only once: possible typo at cloc line xx.
8290
    print scalar keys %RE if $opt_v < -20;
8291
#?#print "$all_lines\n";
8292
    print "<- call_regexp_common\n" if $opt_v > 2;
8293
    return split("\n", $all_lines);
8294
} # 1}}}
8295
sub plural_form {                            # {{{1
8296
    # For getting the right plural form on some English nouns.
8297
    my $n = shift @_;
8298
    if ($n == 1) { return ( 1, "" ); }
8299
    else         { return ($n, "s"); }
8300
} # 1}}}
8301
sub matlab_or_objective_C {                  # {{{1
8302
    # Decide if code is MATLAB, Objective C, MUMPS, or Mercury
8303
    my ($file        , # in
8304
        $rh_Err      , # in   hash of error codes
8305
        $raa_errors  , # out
8306
        $rs_language , # out
8307
       ) = @_;
8308
    print "-> matlab_or_objective_C\n" if $opt_v > 2;
8309
    # matlab markers:
8310
    #   first line starts with "function"
8311
    #   some lines start with "%"
8312
    #   high marks for lines that start with [
8313
    #
8314
    # Objective C markers:
8315
    #   must have at least two brace characters, { }
8316
    #   has /* ... */ style comments
8317
    #   some lines start with @
8318
    #   some lines start with #include
8319
    #
8320
    # MUMPS:
8321
    #   has ; comment markers
8322
    #   do not match:  \w+\s*=\s*\w
8323
    #   lines begin with   \s*\.?\w+\s+\w
8324
    #   high marks for lines that start with \s*K\s+ or \s*Kill\s+
8325
    #
8326
    # Mercury:
8327
    #   any line that begins with :- immediately triggers this 
8328
 
8329
    ${$rs_language} = "";
8330
    my $IN = new IO::File $file, "r";
8331
    if (!defined $IN) {
8332
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8333
        return;
8334
    }
8335
 
8336
    my $DEBUG              = 0;
8337
 
8338
    my $matlab_points      = 0;
8339
    my $objective_C_points = 0;
8340
    my $mumps_points       = 0;
8341
    my $mercury_points     = 0;
8342
    my $has_braces         = 0;
8343
    while (<$IN>) {
8344
        ++$has_braces if $_ =~ m/[{}]/;
8345
#print "LINE $. has_braces=$has_braces\n";
8346
        ++$mumps_points if $. == 1 and m{^[A-Z]};
8347
        if      (m{^\s*/\*} or m {^\s*//}) {   #   /* or //
8348
            $objective_C_points += 5;
8349
            $matlab_points      -= 5;
8350
printf ".m:  /*|//  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8351
        } elsif (m{^:-\s+}) {      # gotta be mercury
8352
            $mercury_points = 1000;
8353
            last;
8354
        } elsif (m{\w+\s*=\s*\[}) {      # matrix assignment, very matlab
8355
            $matlab_points += 5;
8356
printf ".m:  \\w=[   obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8357
        } elsif (m{^\s*\w+\s*=\s*}) {    # definitely not MUMPS
8358
            --$mumps_points;
8359
printf ".m:  \\w=    obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8360
        } elsif (m{^\s*\.?(\w)\s+(\w)} and $1 !~ /\d/ and $2 !~ /\d/) {
8361
            ++$mumps_points;
8362
printf ".m:  \\w \\w  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8363
        } elsif (m{^\s*;}) {
8364
            ++$mumps_points;
8365
printf ".m:  ;      obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8366
        } elsif (m{^\s*#(include|import)}) {
8367
            # Objective C without a doubt
8368
            $objective_C_points = 1000;
8369
            $matlab_points      = 0;
8370
printf ".m: #includ obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8371
            $has_braces         = 2;
8372
            last;
8373
        } elsif (m{^\s*@(interface|implementation|protocol|public|protected|private|end)\s}o) {
8374
            # Objective C without a doubt
8375
            $objective_C_points = 1000;
8376
            $matlab_points      = 0;
8377
printf ".m: keyword obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8378
            last;
8379
        } elsif (m{^\s*\[}) {             #   line starts with [  -- very matlab
8380
            $matlab_points += 5;
8381
printf ".m:  [      obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8382
        } elsif (m{^\sK(ill)?\s+}) {
8383
            $mumps_points  += 5;
8384
printf ".m:  Kill   obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8385
        } elsif (m{^\s*function}) {
8386
            --$objective_C_points;
8387
            ++$matlab_points;
8388
printf ".m:  funct  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8389
        } elsif (m{^\s*%}) {              #   %
8390
            # matlab commented line
8391
            --$objective_C_points;
8392
            ++$matlab_points;
8393
printf ".m:  pcent  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8394
        }
8395
    }
8396
    $IN->close;
8397
printf "END LOOP    obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
8398
 
8399
    # next heuristic is unreliable for small files
8400
#   $objective_C_points = -9.9e20 unless $has_braces >= 2;
8401
 
8402
    my %points = ( 'MATLAB'      => $matlab_points     ,
8403
                   'MUMPS'       => $mumps_points      ,
8404
                   'Objective C' => $objective_C_points,
8405
                   'Mercury'     => $mercury_points    , );
8406
 
8407
    ${$rs_language} = (sort { $points{$b} <=> $points{$a}} keys %points)[0];
8408
 
8409
    print "<- matlab_or_objective_C($file: matlab=$matlab_points, C=$objective_C_points, mumps=$mumps_points, mercury=$mercury_points) => ${$rs_language}\n"
8410
        if $opt_v > 2;
8411
 
8412
} # 1}}}
8413
sub Lisp_or_OpenCL {                         # {{{1
8414
    my ($file        , # in
8415
        $rh_Err      , # in   hash of error codes
8416
        $raa_errors  , # out
8417
       ) = @_;
8418
 
8419
    print "-> Lisp_or_OpenCL\n" if $opt_v > 2;
8420
 
8421
    my $lang = undef;
8422
    my $IN = new IO::File $file, "r";
8423
    if (!defined $IN) {
8424
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8425
        return $lang;
8426
    }
8427
    my $lisp_points   = 0;
8428
    my $opcl_points = 0;
8429
    while (<$IN>) {
8430
        ++$lisp_points if  /^\s*;/;
8431
        ++$lisp_points if  /\((def|eval|require|export|let|loop|dec|format)/;
8432
        ++$opcl_points if  /^\s*(int|float|const|{)/;
8433
    }
8434
    $IN->close;
8435
    # print "lisp_points=$lisp_points   opcl_points=$opcl_points\n";
8436
    if ($lisp_points > $opcl_points) {
8437
        $lang = "Lisp";
8438
    } else {
8439
        $lang = "OpenCL";
8440
    }
8441
 
8442
    print "<- Lisp_or_OpenCL\n" if $opt_v > 2;
8443
    return $lang;
8444
} # 1}}}
8445
sub Lisp_or_Julia {                          # {{{1
8446
    my ($file        , # in
8447
        $rh_Err      , # in   hash of error codes
8448
        $raa_errors  , # out
8449
       ) = @_;
8450
 
8451
    print "-> Lisp_or_Julia\n" if $opt_v > 2;
8452
 
8453
    my $lang = undef;
8454
    my $IN = new IO::File $file, "r";
8455
    if (!defined $IN) {
8456
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8457
        return $lang;
8458
    }
8459
    my $lisp_points   = 0;
8460
    my $julia_points = 0;
8461
    while (<$IN>) {
8462
        ++$lisp_points if  /^\s*;/;
8463
        ++$lisp_points if  /\((def|eval|require|export|let|loop|dec|format)/;
8464
        ++$julia_points if  /^\s*(function|end|println|for|while)/;
8465
    }
8466
    $IN->close;
8467
    # print "lisp_points=$lisp_points   julia_points=$julia_points\n";
8468
    if ($lisp_points > $julia_points) {
8469
        $lang = "Lisp";
8470
    } else {
8471
        $lang = "Julia";
8472
    }
8473
 
8474
    print "<- Lisp_or_Julia\n" if $opt_v > 2;
8475
    return $lang;
8476
} # 1}}}
8477
sub Perl_or_Prolog {                         # {{{1
8478
    my ($file        , # in
8479
        $rh_Err      , # in   hash of error codes
8480
        $raa_errors  , # out
8481
       ) = @_;
8482
 
8483
    print "-> Perl_or_Prolog\n" if $opt_v > 2;
8484
 
8485
    my $lang = undef;
8486
    my $IN = new IO::File $file, "r";
8487
    if (!defined $IN) {
8488
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8489
        return $lang;
8490
    }
8491
    my $perl_points = 0;
8492
    my $prolog_points = 0;
8493
    while (<$IN>) {
8494
        ++$perl_points if  /;\s*$/;
8495
        ++$perl_points if  /({|})/;
8496
        ++$perl_points if  /^\s*sub\s+/;
8497
        ++$prolog_points if /\.\s*$/;
8498
        ++$prolog_points if /:-/;
8499
    }
8500
    $IN->close;
8501
    # print "perl_points=$perl_points   prolog_points=$prolog_points\n";
8502
    if ($perl_points > $prolog_points) {
8503
        $lang = "Perl";
8504
    } else {
8505
        $lang = "Prolog";
8506
    }
8507
 
8508
    print "<- Perl_or_Prolog\n" if $opt_v > 2;
8509
    return $lang;
8510
} # 1}}}
8511
sub IDL_or_QtProject {                         # {{{1
8512
    # also Prolog
8513
    my ($file        , # in
8514
        $rh_Err      , # in   hash of error codes
8515
        $raa_errors  , # out
8516
       ) = @_;
8517
 
8518
    print "-> IDL_or_QtProject($file)\n" if $opt_v > 2;
8519
 
8520
    my $lang = undef;
8521
    my $IN = new IO::File $file, "r";
8522
    if (!defined $IN) {
8523
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8524
        return $lang;
8525
    }
8526
    my $idl_points    = 0;
8527
    my $qtproj_points = 0;
8528
    my $prolog_points = 0;
8529
    while (<$IN>) {
8530
        ++$idl_points    if /^\s*;/;
8531
        ++$idl_points    if /plot\(/i;
8532
        ++$qtproj_points if /^\s*(qt|configs|sources)\s*\+?=/i;
8533
        ++$prolog_points if /\.\s*$/;
8534
        ++$prolog_points if /:-/;
8535
    }
8536
    $IN->close;
8537
    # print "idl_points=$idl_points   qtproj_points=$qtproj_points\n";
8538
 
8539
    if ($idl_points > $qtproj_points) {
8540
        $lang = "IDL";
8541
    } else {
8542
        $lang = "Qt Project";
8543
    }
8544
 
8545
    my %points = ( 'IDL'        => $idl_points     ,
8546
                   'Qt Project' => $qtproj_points  ,
8547
                   'Prolog'     => $prolog_points  , );
8548
 
8549
    $lang = (sort { $points{$b} <=> $points{$a}} keys %points)[0];
8550
 
8551
    print "<- IDL_or_QtProject(idl_points=$idl_points, ",
8552
          "qtproj_points=$qtproj_points, prolog_points=$prolog_points)\n" 
8553
           if $opt_v > 2;
8554
    return $lang;
8555
} # 1}}}
8556
sub Ant_or_XML {                             # {{{1
8557
    my ($file        , # in
8558
        $rh_Err      , # in   hash of error codes
8559
        $raa_errors  , # out
8560
       ) = @_;
8561
 
8562
    print "-> Ant_or_XML($file)\n" if $opt_v > 2;
8563
 
8564
    my $lang = "XML";
8565
    my $IN = new IO::File $file, "r";
8566
    if (!defined $IN) {
8567
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8568
        return $lang;
8569
    }
8570
    my $Ant_points   = 0;
8571
    my $XML_points   = 1;
8572
    while (<$IN>) {
8573
        if (/^\s*<project\s+/) {
8574
            ++$Ant_points  ;
8575
            --$XML_points  ;
8576
        }
8577
        if (/xmlns:artifact="antlib:org.apache.maven.artifact.ant"/) {
8578
            ++$Ant_points  ;
8579
            --$XML_points  ;
8580
        }
8581
    }
8582
    $IN->close;
8583
 
8584
    if ($XML_points >= $Ant_points) {
8585
        # tie or better goes to XML
8586
        $lang = "XML";
8587
    } else {
8588
        $lang = "Ant";
8589
    }
8590
 
8591
    print "<- Ant_or_XML($lang)\n" if $opt_v > 2;
8592
    return $lang;
8593
} # 1}}}
8594
sub Maven_or_XML {                           # {{{1
8595
    my ($file        , # in
8596
        $rh_Err      , # in   hash of error codes
8597
        $raa_errors  , # out
8598
       ) = @_;
8599
 
8600
    print "-> Maven_or_XML($file)\n" if $opt_v > 2;
8601
 
8602
    my $lang = "XML";
8603
    my $IN = new IO::File $file, "r";
8604
    if (!defined $IN) {
8605
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8606
        return $lang;
8607
    }
8608
    my $Mvn_points   = 0;
8609
    my $XML_points   = 1;
8610
    while (<$IN>) {
8611
        if (/^\s*<project\s+/) {
8612
            ++$Mvn_points  ;
8613
            --$XML_points  ;
8614
        }
8615
        if (m{xmlns="http://maven.apache.org/POM/}) {
8616
            ++$Mvn_points  ;
8617
            --$XML_points  ;
8618
        }
8619
    }
8620
    $IN->close;
8621
 
8622
    if ($XML_points >= $Mvn_points) {
8623
        # tie or better goes to XML
8624
        $lang = "XML";
8625
    } else {
8626
        $lang = "Maven";
8627
    }
8628
 
8629
    print "<- Maven_or_XML($lang)\n" if $opt_v > 2;
8630
    return $lang;
8631
} # 1}}}
8632
sub pascal_or_puppet {                       # {{{1
8633
    # Decide if code is Pascal or Puppet manifest
8634
    my ($file        , # in
8635
        $rh_Err      , # in   hash of error codes
8636
        $raa_errors  , # out
8637
        $rs_language , # out
8638
       ) = @_;
8639
 
8640
    print "-> pascal_or_puppet\n" if $opt_v > 2;
8641
 
8642
    ${$rs_language} = "";
8643
    my $IN = new IO::File $file, "r";
8644
    if (!defined $IN) {
8645
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
8646
        return;
8647
    }
8648
 
8649
    my $DEBUG              = 0;
8650
    my $pascal_points      = 0;
8651
    my $puppet_points      = 0;
8652
 
8653
    while (<$IN>) {
8654
        ++$pascal_points if /\bprogram\s+[A-Za-z]/i;
8655
        ++$pascal_points if /\bunit\s+[A-Za-z]/i;
8656
        ++$pascal_points if /\bmodule\s+[A-Za-z]/i;
8657
        ++$pascal_points if /\bprocedure\b/i;
8658
        ++$pascal_points if /\bfunction\b/i;
8659
        ++$pascal_points if /^\s*interface\s+/i;
8660
        ++$pascal_points if /^\s*implementation\s+/i;
8661
        ++$pascal_points if /\bbegin\b/i;
8662
        ++$pascal_points if /\bend\b/i;
8663
 
8664
        ++$puppet_points if /^\s*class\s+/;
8665
        ++$puppet_points if /^\s*case\s+/;
8666
        ++$puppet_points if /^\s*package\s+/;
8667
        ++$puppet_points if /^\s*file\s+/;
8668
        ++$puppet_points if /^\s*service\s+/;
8669
    }
8670
    $IN->close;
8671
 
8672
    print "<- pascal_or_puppet(pascal=$pascal_points, puppet=$puppet_points)\n"
8673
        if $opt_v > 2;
8674
 
8675
    if ($pascal_points > $puppet_points) {
8676
        ${$rs_language} = "Pascal";
8677
    } else {
8678
        ${$rs_language} = "Puppet";
8679
    }
8680
 
8681
} # 1}}}
8682
sub html_colored_text {                      # {{{1
8683
    # http://www.pagetutor.com/pagetutor/makapage/pics/net216-2.gif
8684
    my ($color, $text) = @_;
8685
#?#die "html_colored_text($text)";
8686
    if      ($color =~ /^red$/i)   {
8687
        $color = "#ff0000";
8688
    } elsif ($color =~ /^green$/i) {
8689
        $color = "#00ff00";
8690
    } elsif ($color =~ /^blue$/i)  {
8691
        $color = "#0000ff";
8692
    } elsif ($color =~ /^grey$/i)  {
8693
        $color = "#cccccc";
8694
    }
8695
#   return "" unless $text;
8696
    return '<font color="' . $color . '">' . html_metachars($text) . "</font>";
8697
} # 1}}}
8698
sub xml_metachars {                          # {{{1
8699
    # http://en.wikipedia.org/wiki/Character_encodings_in_HTML#XML_character_references
8700
    my ($string, ) = shift @_;
8701
 
8702
    my  @in_chars    = split(//, $string);
8703
    my  @out_chars   = ();
8704
    foreach my $c (@in_chars) {
8705
        if      ($c eq '&') { push @out_chars, '&amp;'
8706
        } elsif ($c eq '<') { push @out_chars, '&lt;'
8707
        } elsif ($c eq '>') { push @out_chars, '&gt;'
8708
        } elsif ($c eq '"') { push @out_chars, '&quot;'
8709
        } elsif ($c eq "'") { push @out_chars, '&apos;'
8710
        } else {
8711
            push @out_chars, $c;
8712
        }
8713
    }
8714
    return join "", @out_chars; 
8715
} # 1}}}
8716
sub html_metachars {                         # {{{1
8717
    # Replace HTML metacharacters with their printable forms.
8718
    # Future:  use HTML-Encoder-0.00_04/lib/HTML/Encoder.pm
8719
    # from Fabiano Reese Righetti's HTML::Encoder module if 
8720
    # this subroutine proves to be too simplistic.
8721
    my ($string, ) = shift @_;
8722
 
8723
    my  @in_chars    = split(//, $string);
8724
    my  @out_chars   = ();
8725
    foreach my $c (@in_chars) {
8726
        if      ($c eq '<') {
8727
            push @out_chars, '&lt;'
8728
        } elsif ($c eq '>') {
8729
            push @out_chars, '&gt;'
8730
        } elsif ($c eq '&') {
8731
            push @out_chars, '&amp;'
8732
        } else {
8733
            push @out_chars, $c;
8734
        }
8735
    }
8736
    return join "", @out_chars; 
8737
} # 1}}}
8738
sub test_alg_diff {                          # {{{1
8739
    my ($file_1 ,
8740
        $file_2 )
8741
       = @_;
8742
    my $fh_1 = new IO::File $file_1, "r";
8743
    die "Unable to read $file_1:  $!\n" unless defined $fh_1;
8744
    chomp(my @lines_1 = <$fh_1>);
8745
    $fh_1->close;
8746
 
8747
    my $fh_2 = new IO::File $file_2, "r";
8748
    die "Unable to read $file_2:  $!\n" unless defined $fh_2;
8749
    chomp(my @lines_2 = <$fh_2>);
8750
    $fh_2->close;
8751
 
8752
    my $n_no_change = 0;
8753
    my $n_modified  = 0;
8754
    my $n_added     = 0;
8755
    my $n_deleted   = 0;
8756
    my @min_sdiff   = ();
8757
my $NN = chr(27) . "[0m";  # normal
8758
my $BB = chr(27) . "[1m";  # bold
8759
 
8760
    my @sdiffs = sdiff( \@lines_1, \@lines_2 );
8761
    foreach my $entry (@sdiffs) {
8762
        my ($out_1, $out_2) = ('', '');
8763
        if ($entry->[0] eq 'u') { 
8764
            ++$n_no_change; 
8765
          # $out_1 = $entry->[1];
8766
          # $out_2 = $entry->[2];
8767
            next; 
8768
        }
8769
#       push @min_sdiff, $entry;
8770
        if      ($entry->[0] eq 'c') { 
8771
            ++$n_modified;
8772
            ($out_1, $out_2) = diff_two_strings($entry->[1], $entry->[2]);
8773
            $out_1 =~ s/\cA(\w)/${BB}$1${NN}/g;
8774
            $out_2 =~ s/\cA(\w)/${BB}$1${NN}/g;
8775
          # $out_1 =~ s/\cA//g;
8776
          # $out_2 =~ s/\cA//g;
8777
        } elsif ($entry->[0] eq '+') { 
8778
            ++$n_added;
8779
            $out_1 = $entry->[1];
8780
            $out_2 = $entry->[2];
8781
        } elsif ($entry->[0] eq '-') { 
8782
            ++$n_deleted;
8783
            $out_1 = $entry->[1];
8784
            $out_2 = $entry->[2];
8785
        } elsif ($entry->[0] eq 'u') { 
8786
        } else { die "unknown entry->[0]=[$entry->[0]]\n"; }
8787
        printf "%-80s | %s\n", $out_1, $out_2;
8788
    }
8789
 
8790
#   foreach my $entry (@min_sdiff) {
8791
#       printf "DIFF:  %s  %s  %s\n", @{$entry};
8792
#   }
8793
} # 1}}}
8794
sub write_comments_to_html {                 # {{{1
8795
    my ($filename      , # in
8796
        $rah_diff_L    , # in  see routine array_diff() for explanation
8797
        $rah_diff_R    , # in  see routine array_diff() for explanation
8798
        $rh_blank      , # in  location and counts of blank lines
8799
       ) = @_;
8800
 
8801
    print "-> write_comments_to_html($filename)\n" if $opt_v > 2;
8802
    my $file = $filename . ".html";
8803
#use Data::Dumper;
8804
#print Dumper("rah_diff_L", $rah_diff_L, "rah_diff_R", $rah_diff_R);
8805
    my $OUT = new IO::File $file, "w";
8806
    if (!defined $OUT) {
8807
        warn "Unable to write to $file\n";
8808
        print "<- write_comments_to_html\n" if $opt_v > 2;
8809
        return;
8810
    }
8811
 
8812
    my $approx_line_count = scalar @{$rah_diff_L};
8813
    my $n_digits = 1 + int(log($approx_line_count)/2.30258509299405); # log_10
8814
 
8815
    my $html_out = html_header($filename);
8816
 
8817
    my $comment_line_number = 0;
8818
    for (my $i = 0; $i < scalar @{$rah_diff_R}; $i++) {
8819
        if (defined $rh_blank->{$i}) {
8820
            foreach (1..$rh_blank->{$i}) {
8821
                $html_out .= "<!-- blank -->\n";
8822
            }
8823
        }
8824
        my $line_num = "";
8825
        my $pre      = "";
8826
        my $post     = '</span> &nbsp;';
8827
warn "undef rah_diff_R[$i]{type} " unless defined $rah_diff_R->[$i]{type};
8828
        if ($rah_diff_R->[$i]{type} eq 'nonexist') {
8829
            ++$comment_line_number;
8830
            $line_num = sprintf "\&nbsp; <span class=\"clinenum\"> %0${n_digits}d %s",
8831
                            $comment_line_number, $post;
8832
            $pre = '<span class="comment">';
8833
            $html_out .= $line_num;  
8834
            $html_out .= $pre .  
8835
                         html_metachars($rah_diff_L->[$i]{char}) . 
8836
                         $post . "\n";
8837
            next;
8838
        }
8839
        if      ($rah_diff_R->[$i]{type} eq 'code' and
8840
                 $rah_diff_R->[$i]{desc} eq 'same') {
8841
            # entire line remains as-is
8842
            $line_num = sprintf "\&nbsp; <span class=\"linenum\"> %0${n_digits}d %s",
8843
                            $rah_diff_R->[$i]{lnum}, $post;
8844
            $pre    = '<span class="normal">';
8845
            $html_out .= $line_num;  
8846
            $html_out .= $pre . 
8847
                         html_metachars($rah_diff_R->[$i]{char}) . $post;
8848
#XX     } elsif ($rah_diff_R->[$i]{type} eq 'code') { # code+comments
8849
#XX
8850
#XX         $line_num = '<span class="linenum">' .
8851
#XX                      $rah_diff_R->[$i]{lnum} . $post;
8852
#XX         $html_out .= $line_num;  
8853
#XX
8854
#XX         my @strings = @{$rah_diff_R->[$i]{char}{strings}}; 
8855
#XX         my @type    = @{$rah_diff_R->[$i]{char}{type}}; 
8856
#XX         for (my $i = 0; $i < scalar @strings; $i++) {
8857
#XX             if ($type[$i] eq 'u') {
8858
#XX                 $pre = '<span class="normal">';
8859
#XX             } else {
8860
#XX                 $pre = '<span class="comment">';
8861
#XX             }
8862
#XX             $html_out .= $pre .  html_metachars($strings[$i]) . $post;
8863
#XX         }
8864
# print Dumper(@strings, @type); die;
8865
 
8866
        } elsif ($rah_diff_R->[$i]{type} eq 'comment') {
8867
            $line_num = '<span class="clinenum">' . $comment_line_number . $post;
8868
            # entire line is a comment
8869
            $pre    = '<span class="comment">';
8870
            $html_out .= $pre .
8871
                         html_metachars($rah_diff_R->[$i]{char}) . $post;
8872
        }
8873
#printf "%-30s %s %-30s\n", $line_1, $separator, $line_2;
8874
        $html_out .= "\n";
8875
    }
8876
 
8877
    $html_out .= html_end();
8878
 
8879
    my $out_file = "$filename.html";
8880
    open  OUT, ">$out_file" or die "Cannot write to $out_file $!\n";
8881
    print OUT $html_out;
8882
    close OUT;
8883
    print "Wrote $out_file\n" unless $opt_quiet;
8884
    $OUT->close;
8885
 
8886
    print "<- write_comments_to_html\n" if $opt_v > 2;
8887
} # 1}}}
8888
sub array_diff {                             # {{{1
8889
    my ($file          , # in  only used for error reporting
8890
        $ra_lines_L    , # in  array of lines in Left  file (no blank lines)
8891
        $ra_lines_R    , # in  array of lines in Right file (no blank lines)
8892
        $mode          , # in  "comment" | "revision"
8893
        $rah_diff_L    , # out
8894
        $rah_diff_R    , # out
8895
        $raa_Errors    , # in/out
8896
       ) = @_;
8897
 
8898
    # This routine operates in two ways:
8899
    # A. Computes diffs of the same file with and without comments.
8900
    #    This is used to classify lines as code, comments, or blank.
8901
    # B. Computes diffs of two revisions of a file.  This method
8902
    #    requires a prior run of method A using the older version
8903
    #    of the file because it needs lines to be classified.
8904
 
8905
    # $rah_diff structure:
8906
    # An array with n entries where n equals the number of lines in 
8907
    # an sdiff of the two files.  Each entry in the array describes
8908
    # the contents of the corresponding line in file Left and file Right:
8909
    #  diff[]{type} = blank | code | code+comment | comment | nonexist
8910
    #        {lnum} = line number within the original file (1-based)
8911
    #        {desc} = same | added | removed | modified
8912
    #        {char} = the input line unless {desc} = 'modified' in
8913
    #                 which case
8914
    #        {char}{strings} = [ substrings ]
8915
    #        {char}{type}    = [ disposition (added, removed, etc)]
8916
    #
8917
 
8918
    @{$rah_diff_L} = ();
8919
    @{$rah_diff_R} = ();
8920
 
8921
    print "-> array_diff()\n" if $opt_v > 2;
8922
    my $COMMENT_MODE = 0;
8923
       $COMMENT_MODE = 1 if $mode eq "comment";
8924
 
8925
#print "array_diff(mode=$mode)\n";
8926
#print Dumper("block left:" , $ra_lines_L);
8927
#print Dumper("block right:", $ra_lines_R);
8928
 
8929
    my @sdiffs = ();
8930
    eval {
8931
        local $SIG{ALRM} = sub { die "alarm\n" };
8932
        alarm $opt_diff_timeout;
8933
        @sdiffs = sdiff($ra_lines_L, $ra_lines_R);
8934
        alarm 0;
8935
    };
8936
    if ($@) {
8937
        # timed out
8938
        die unless $@ eq "alarm\n"; # propagate unexpected errors
8939
        push @{$raa_Errors}, 
8940
             [ $Error_Codes{'Diff error, exceeded timeout'}, $file ];
8941
        if ($opt_v) {
8942
          warn "array_diff: diff timeout failure for $file--ignoring\n";
8943
        }
8944
        return;
8945
    }
8946
 
8947
#use Data::Dumper::Simple;
8948
#print Dumper($ra_lines_L, $ra_lines_R, @sdiffs);
8949
#die;
8950
 
8951
    my $n_L        = 0;
8952
    my $n_R        = 0;
8953
    my $n_sdiff    = 0;  # index to $rah_diff_L, $rah_diff_R
8954
    foreach my $triple (@sdiffs) {
8955
        my $flag   = $triple->[0];
8956
        my $line_L = $triple->[1];
8957
        my $line_R = $triple->[2];
8958
        $rah_diff_L->[$n_sdiff]{char} = $line_L;
8959
        $rah_diff_R->[$n_sdiff]{char} = $line_R;
8960
        if      ($flag eq 'u') {  # u = unchanged
8961
            ++$n_L;
8962
            ++$n_R;
8963
            if ($COMMENT_MODE) {
8964
                # line exists in both with & without comments, must be code
8965
                $rah_diff_L->[$n_sdiff]{type} = "code";
8966
                $rah_diff_R->[$n_sdiff]{type} = "code";
8967
            }
8968
            $rah_diff_L->[$n_sdiff]{desc} = "same";
8969
            $rah_diff_R->[$n_sdiff]{desc} = "same";
8970
            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
8971
            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
8972
        } elsif ($flag eq 'c') {  # c = changed
8973
# warn "per line sdiff() commented out\n"; if (0) {
8974
            ++$n_L;
8975
            ++$n_R;
8976
 
8977
            if ($COMMENT_MODE) {
8978
                # line has text both with & without comments;
8979
                # count as code
8980
                $rah_diff_L->[$n_sdiff]{type} = "code";
8981
                $rah_diff_R->[$n_sdiff]{type} = "code";
8982
            }
8983
 
8984
            my @chars_L = split '', $line_L;
8985
            my @chars_R = split '', $line_R;
8986
 
8987
#XX         my @inline_sdiffs = sdiff( \@chars_L, \@chars_R );
8988
 
8989
#use Data::Dumper::Simple; 
8990
#if ($n_R == 6 or $n_R == 1 or $n_R == 2) {
8991
#print "L=[$line_L]\n";
8992
#print "R=[$line_R]\n";
8993
#print Dumper(@chars_L, @chars_R, @inline_sdiffs);
8994
#}
8995
#XX         my @index = ();
8996
#XX         foreach my $il_triple (@inline_sdiffs) {
8997
#XX             # make an array of u|c|+|- corresponding
8998
#XX             # to each character
8999
#XX             push @index, $il_triple->[0];
9000
#XX         }
9001
#XX#print Dumper(@index); die;
9002
#XX          # expect problems if arrays @index and $inline_sdiffs[1];
9003
#XX          # (@{$inline_sdiffs->[1]} are the characters of line_L)
9004
#XX          # aren't the same length
9005
#XX          my $prev_type = $index[0];
9006
#XX          my @strings   = ();  # blocks of consecutive code or comment
9007
#XX          my @type      = ();  # u (=code) or c (=comment)
9008
#XX          my $j_str     = 0;
9009
#XX          $strings[$j_str] .= $chars_L[0];
9010
#XX          $type[$j_str] = $prev_type;
9011
#XX          for (my $i = 1; $i < scalar @chars_L; $i++) {
9012
#XX              if ($index[$i] ne $prev_type) {
9013
#XX                  ++$j_str;
9014
#XX#print "change at j_str=$j_str type=$index[$i]\n";
9015
#XX                  $type[$j_str] = $index[$i];
9016
#XX                  $prev_type    = $index[$i];
9017
#XX              }
9018
#XX              $strings[$j_str] .= $chars_L[$i];
9019
#XX          }
9020
# print Dumper(@strings, @type); die;
9021
#XX         delete $rah_diff_R->[$n_sdiff]{char};
9022
#XX         @{$rah_diff_R->[$n_sdiff]{char}{strings}} = @strings;
9023
#XX         @{$rah_diff_R->[$n_sdiff]{char}{type}}    = @type;
9024
            $rah_diff_L->[$n_sdiff]{desc} = "modified";
9025
            $rah_diff_R->[$n_sdiff]{desc} = "modified";
9026
            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
9027
            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
9028
#}
9029
 
9030
        } elsif ($flag eq '+') {  # + = added
9031
            ++$n_R;
9032
            if ($COMMENT_MODE) {
9033
                # should never get here
9034
                @{$rah_diff_L} = ();
9035
                @{$rah_diff_R} = ();
9036
                push @{$raa_Errors}, 
9037
                     [ $Error_Codes{'Diff error (quoted comments?)'}, $file ];
9038
                if ($opt_v) {
9039
                  warn "array_diff: diff failure (diff says the\n";
9040
                  warn "comment-free file has added lines).\n";
9041
                  warn "$n_sdiff  $line_L\n";
9042
                }
9043
                last;
9044
            }
9045
            $rah_diff_L->[$n_sdiff]{type} = "nonexist";
9046
            $rah_diff_L->[$n_sdiff]{desc} = "removed";
9047
            $rah_diff_R->[$n_sdiff]{desc} = "added";
9048
            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
9049
        } elsif ($flag eq '-') {  # - = removed
9050
            ++$n_L;
9051
            if ($COMMENT_MODE) {
9052
                # line must be comment because blanks already gone
9053
                $rah_diff_L->[$n_sdiff]{type} = "comment";
9054
            }
9055
            $rah_diff_R->[$n_sdiff]{type} = "nonexist";
9056
            $rah_diff_R->[$n_sdiff]{desc} = "removed";
9057
            $rah_diff_L->[$n_sdiff]{desc} = "added";
9058
            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
9059
        }
9060
#printf "%-30s %s %-30s\n", $line_L, $separator, $line_R;
9061
        ++$n_sdiff;
9062
    }
9063
#use Data::Dumper::Simple;
9064
#print Dumper($rah_diff_L, $rah_diff_R);
9065
 
9066
    print "<- array_diff\n" if $opt_v > 2;
9067
} # 1}}}
9068
sub remove_leading_dir {                     # {{{1 
9069
    my @filenames = @_;
9070
    #
9071
    #  Input should be a list of file names
9072
    #  with the same leading directory such as
9073
    # 
9074
    #      dir1/dir2/a.txt
9075
    #      dir1/dir2/b.txt
9076
    #      dir1/dir2/dir3/c.txt
9077
    #
9078
    #  Output is the same list minus the common
9079
    #  directory path:
9080
    # 
9081
    #      a.txt
9082
    #      b.txt
9083
    #      dir3/c.txt
9084
    #
9085
    print "-> remove_leading_dir()\n" if $opt_v > 2;
9086
    my @D = (); # a matrix:   [ [ dir1, dir2 ],         # dir1/dir2/a.txt
9087
                #               [ dir1, dir2 ],         # dir1/dir2/b.txt
9088
                #               [ dir1, dir2 , dir3] ]  # dir1/dir2/dir3/c.txt
9089
    if ($ON_WINDOWS) {
9090
        foreach my $F (@filenames) {
9091
            $F =~ s{\\}{/}g;
9092
            $F = ucfirst($F) if $F =~ /^\w:/;  # uppercase drive letter
9093
        }
9094
    }
9095
    if (scalar @filenames == 1) {
9096
        # special case:  with only one filename
9097
        # cannot determine a baseline, just remove first directory level
9098
        $filenames[0] =~ s{^.*?/}{};
9099
        print "-> $filenames[0]\n";
9100
        return $filenames[0];
9101
    }
9102
    foreach my $F (@filenames) {
9103
        my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
9104
        my @x = File::Spec->splitdir( $Dir );
9105
        pop @x unless $x[$#x]; # last entry usually null, remove it
9106
        if ($ON_WINDOWS) {
9107
            if (defined($Vol) and $Vol) {
9108
                # put the drive letter, eg, C:, at the front
9109
                unshift @x, uc $Vol;
9110
            }
9111
        }
9112
#print "F=$F, Dir=$Dir  x=[", join("][", @x), "]\n";
9113
        push @D, [ @x ];
9114
    }
9115
 
9116
    # now loop over columns until either they are all
9117
    # eliminated or a unique column is found
9118
 
9119
#use Data::Dumper::Simple;
9120
#print Dumper("remove_leading_dir after ", @D);
9121
 
9122
    my @common   = ();  # to contain the common leading directories
9123
    my $mismatch = 0;
9124
    while (!$mismatch) {
9125
        for (my $row = 1; $row < scalar @D; $row++) {
9126
#print "comparing $D[$row][0] to $D[0][0]\n";
9127
 
9128
            if (!defined $D[$row][0] or !defined $D[0][0] or
9129
                ($D[$row][0] ne $D[0][0])) {
9130
                $mismatch = 1;
9131
                last;
9132
            }
9133
        }
9134
#print "mismatch=$mismatch\n";
9135
        if (!$mismatch) {
9136
            push @common, $D[0][0];
9137
            # all terms in the leading match; unshift the batch
9138
            foreach my $ra (@D) {
9139
                shift @{$ra};
9140
            }
9141
        }
9142
    }
9143
 
9144
    push @common, " ";  # so that $leading will end with "/ "
9145
    my $leading = File::Spec->catdir( @common );
9146
       $leading =~ s{ $}{};  # now take back the bogus appended space
9147
#print "remove_leading_dir leading=[$leading]\n"; die;
9148
    if ($ON_WINDOWS) {
9149
       $leading =~ s{\\}{/}g;
9150
    }
9151
    foreach my $F (@filenames) {
9152
        $F =~ s{^$leading}{};
9153
    }
9154
 
9155
    print "<- remove_leading_dir()\n" if $opt_v > 2;
9156
    return @filenames;
9157
 
9158
} # 1}}}
9159
sub strip_leading_dir {                      # {{{1 
9160
    my ($leading, @filenames) = @_;
9161
    #  removes the string $leading from each entry in @filenames
9162
    print "-> strip_leading_dir()\n" if $opt_v > 2;
9163
 
9164
#print "remove_leading_dir leading=[$leading]\n"; die;
9165
    if ($ON_WINDOWS) {
9166
       $leading =~ s{\\}{/}g;
9167
        foreach my $F (@filenames) {
9168
            $F =~ s{\\}{/}g;
9169
        }
9170
    }
9171
    foreach my $F (@filenames) {
9172
        $F =~ s{^$leading}{};
9173
    }
9174
 
9175
    print "<- strip_leading_dir()\n" if $opt_v > 2;
9176
    return @filenames;
9177
 
9178
} # 1}}}
9179
sub find_deepest_file {                      # {{{1 
9180
    my @filenames = @_;
9181
    #
9182
    #  Input should be a list of file names
9183
    #  with the same leading directory such as
9184
    # 
9185
    #      dir1/dir2/a.txt
9186
    #      dir1/dir2/b.txt
9187
    #      dir1/dir2/dir3/c.txt
9188
    #
9189
    #  Output is the file with the most parent directories:
9190
    # 
9191
    #      dir1/dir2/dir3/c.txt
9192
 
9193
    print "-> find_deepest_file()\n" if $opt_v > 2;
9194
 
9195
    my $deepest    = undef;
9196
    my $max_subdir = -1;
9197
    foreach my $F (sort @filenames) {
9198
        my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
9199
        my @x = File::Spec->splitdir( $Dir );
9200
        pop @x unless $x[$#x]; # last entry usually null, remove it
9201
        if (scalar @x > $max_subdir) {
9202
            $deepest    = $F;
9203
            $max_subdir = scalar @x;
9204
        }
9205
    }
9206
 
9207
    print "<- find_deepest_file()\n" if $opt_v > 2;
9208
    return $deepest;
9209
 
9210
} # 1}}}
9211
sub find_uncommon_parent_dir {               # {{{1
9212
    my ($file_L, $file_R) = @_;
9213
    #
9214
    # example:
9215
    #
9216
    #   file_L = "perl-5.16.1/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
9217
    #   file_R = "/tmp/8VxQG0OLbp/perl-5.16.3/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
9218
    #
9219
    # then return
9220
    #
9221
    #   "perl-5.16.1",
9222
    #   "/tmp/8VxQG0OLbp/perl-5.16.3",
9223
 
9224
    my ($Vol_L, $Dir_L, $File_L) = File::Spec->splitpath($file_L);
9225
    my @x_L = File::Spec->splitdir( $Dir_L );
9226
    my ($Vol_R, $Dir_R, $File_R) = File::Spec->splitpath($file_R);
9227
    my @x_R = File::Spec->splitdir( $Dir_R );
9228
 
9229
    my @common  = ();
9230
 
9231
    # work backwards
9232
    while ($x_L[$#x_L] eq $x_R[$#x_R]) {
9233
        push @common, $x_L[$#x_L];
9234
        pop  @x_L;
9235
        pop  @x_R;
9236
    }
9237
    my $success = scalar @common;
9238
 
9239
    my $dirs_L = File::Spec->catdir( @x_L );
9240
    my $dirs_R = File::Spec->catdir( @x_R );
9241
    my $lead_L = File::Spec->catpath( $Vol_L, $dirs_L, "" );
9242
    my $lead_R = File::Spec->catpath( $Vol_R, $dirs_R, "" );
9243
 
9244
    return $lead_L, $lead_R, $success;
9245
 
9246
} # 1}}}
9247
sub get_leading_dirs {                       # {{{1
9248
    my ($rh_file_list_L, $rh_file_list_R) = @_;
9249
    # find uniquely named files in both sets to help determine the 
9250
    # leading directory positions
9251
    my %unique_filename = ();
9252
    my %basename_L = ();
9253
    my %basename_R = ();
9254
    foreach my $f (keys %{$rh_file_list_L}) {
9255
        my $bn = basename($f);
9256
        $basename_L{ $bn }{'count'}   += 1; 
9257
        $basename_L{ $bn }{'fullpath'} = $f; 
9258
    }
9259
    foreach my $f (keys %{$rh_file_list_R}) {
9260
        my $bn = basename($f);
9261
        $basename_R{ $bn }{'count'}   += 1;
9262
        $basename_R{ $bn }{'fullpath'} = $f; 
9263
    }
9264
    foreach my $f (keys %basename_L) {
9265
        next unless $basename_L{$f}{'count'} == 1;
9266
        next unless defined $basename_R{$f} and $basename_R{$f}{'count'} == 1;
9267
        $unique_filename{$f}{'L'} = $basename_L{ $f }{'fullpath'};
9268
        $unique_filename{$f}{'R'} = $basename_R{ $f }{'fullpath'};
9269
    }
9270
    return undef, undef, 0 unless %unique_filename;
9271
 
9272
    my %candidate_leading_dir_L = ();
9273
    my %candidate_leading_dir_R = ();
9274
    foreach my $f (keys %unique_filename) {
9275
        my $fL = $unique_filename{ $f }{'L'};
9276
        my $fR = $unique_filename{ $f }{'R'};
9277
#printf "%-36s -> %-36s\n", $fL, $fR;
9278
        my $ptr_L = length($fL) - 1;
9279
        my $ptr_R = length($fR) - 1;
9280
        my @aL    = split '', $fL;
9281
        my @aR    = split '', $fR;
9282
        while ($ptr_L >= 0 and $ptr_R >= 0) {
9283
            last if $aL[$ptr_L] ne $aR[$ptr_R];
9284
            --$ptr_L;
9285
            --$ptr_R;
9286
        }
9287
#print "ptr_L=$ptr_L   ptr_R=$ptr_R\n";
9288
        my $leading_dir_L = "";
9289
           $leading_dir_L = substr($fL, 0, $ptr_L+1) if $ptr_L >= 0;
9290
        my $leading_dir_R = "";
9291
           $leading_dir_R = substr($fR, 0, $ptr_R+1) if $ptr_R >= 0;
9292
#print "leading_dir_L=$leading_dir_L   leading_dir_R=$leading_dir_R\n";
9293
        ++$candidate_leading_dir_L{$leading_dir_L};
9294
        ++$candidate_leading_dir_R{$leading_dir_R};
9295
    }
9296
#use Data::Dumper::Simple;
9297
#print Dumper(%candidate_leading_dir_L);
9298
#print Dumper(%candidate_leading_dir_R);
9299
#die;
9300
    my $best_L = (sort {
9301
               $candidate_leading_dir_L{$b} <=> 
9302
               $candidate_leading_dir_L{$a}} keys %candidate_leading_dir_L)[0];
9303
    my $best_R = (sort {
9304
               $candidate_leading_dir_R{$b} <=> 
9305
               $candidate_leading_dir_R{$a}} keys %candidate_leading_dir_R)[0];
9306
    return $best_L, $best_R, 1;
9307
} # 1}}}
9308
sub align_by_pairs {                         # {{{1 
9309
    my ($rh_file_list_L        , # in
9310
        $rh_file_list_R        , # in
9311
        $ra_added              , # out
9312
        $ra_removed            , # out
9313
        $ra_compare_list       , # out
9314
        ) = @_;
9315
    print "-> align_by_pairs()\n" if $opt_v > 2;
9316
    @{$ra_compare_list} = ();
9317
 
9318
    my @files_L = sort keys %{$rh_file_list_L};
9319
    my @files_R = sort keys %{$rh_file_list_R};
9320
    return () unless @files_L or  @files_R;  # at least one must have stuff
9321
    if      ( @files_L and !@files_R) {
9322
        # left side has stuff, right side is empty; everything deleted
9323
        @{$ra_added   }     = ();
9324
        @{$ra_removed }     = @files_L;
9325
        @{$ra_compare_list} = ();
9326
        return;
9327
    } elsif (!@files_L and  @files_R) {
9328
        # left side is empty, right side has stuff; everything added
9329
        @{$ra_added   }     = @files_R;
9330
        @{$ra_removed }     = ();
9331
        @{$ra_compare_list} = ();
9332
        return;
9333
    }
9334
#use Data::Dumper::Simple;
9335
#print Dumper("align_by_pairs", %{$rh_file_list_L}, %{$rh_file_list_R},);
9336
#die;
9337
    if (scalar @files_L == 1 and scalar @files_R == 1) {
9338
        # The easy case:  compare two files.
9339
        push @{$ra_compare_list}, [ $files_L[0],  $files_R[0] ]; 
9340
        @{$ra_added  } = ();
9341
        @{$ra_removed} = ();
9342
        return;
9343
    }
9344
    # The harder case:  compare groups of files.  This only works
9345
    # if the groups are in different directories so the first step
9346
    # is to strip the leading directory names from file lists to
9347
    # make it possible to align by file names.
9348
    my @files_L_minus_dir = undef;
9349
    my @files_R_minus_dir = undef;
9350
 
9351
    my $deepest_file_L    = find_deepest_file(@files_L);
9352
    my $deepest_file_R    = find_deepest_file(@files_R);
9353
#print "deepest L = [$deepest_file_L]\n";
9354
#print "deepest R = [$deepest_file_R]\n";
9355
####my ($leading_dir_L, $leading_dir_R, $success) = 
9356
####    find_uncommon_parent_dir($deepest_file_L, $deepest_file_R);
9357
    my ($leading_dir_L, $leading_dir_R, $success) = 
9358
                get_leading_dirs($rh_file_list_L, $rh_file_list_R);
9359
#print "leading_dir_L=[$leading_dir_L]\n";
9360
#print "leading_dir_R=[$leading_dir_R]\n";
9361
#print "success      =[$success]\n";
9362
    if ($success) {
9363
        @files_L_minus_dir = strip_leading_dir($leading_dir_L, @files_L);
9364
        @files_R_minus_dir = strip_leading_dir($leading_dir_R, @files_R);
9365
    } else {
9366
        # otherwise fall back to old strategy
9367
        @files_L_minus_dir = remove_leading_dir(@files_L);
9368
        @files_R_minus_dir = remove_leading_dir(@files_R);
9369
    }
9370
 
9371
    # Keys of the stripped_X arrays are canonical file names;
9372
    # should overlap mostly.  Keys in stripped_L but not in
9373
    # stripped_R are files that have been deleted.  Keys in
9374
    # stripped_R but not in stripped_L have been added.
9375
    my %stripped_L = ();
9376
       @stripped_L{ @files_L_minus_dir } = @files_L;
9377
    my %stripped_R = ();
9378
       @stripped_R{ @files_R_minus_dir } = @files_R;
9379
 
9380
    my %common = ();
9381
    foreach my $f (keys %stripped_L) {
9382
        $common{$f}  = 1 if     defined $stripped_R{$f};
9383
    }
9384
 
9385
    my %deleted = ();
9386
    foreach my $f (keys %stripped_L) {
9387
        $deleted{$stripped_L{$f}} = $f unless defined $stripped_R{$f};
9388
    }
9389
 
9390
    my %added = ();
9391
    foreach my $f (keys %stripped_R) {
9392
        $added{$stripped_R{$f}}   = $f unless defined $stripped_L{$f};
9393
    }
9394
 
9395
#use Data::Dumper::Simple;
9396
#print Dumper("align_by_pairs", %stripped_L, %stripped_R);
9397
#print Dumper("align_by_pairs", %common, %added, %deleted);
9398
 
9399
    foreach my $f (keys %common) {
9400
        push @{$ra_compare_list}, [ $stripped_L{$f},  
9401
                                    $stripped_R{$f} ]; 
9402
    }
9403
    @{$ra_added   } = keys %added  ;
9404
    @{$ra_removed } = keys %deleted;
9405
 
9406
    print "<- align_by_pairs()\n" if $opt_v > 2;
9407
    return;
9408
#print Dumper("align_by_pairs", @files_L_minus_dir, @files_R_minus_dir);
9409
#die;
9410
} # 1}}}
9411
sub html_header {                            # {{{1
9412
    my ($title , ) = @_;
9413
 
9414
    print "-> html_header\n" if $opt_v > 2;
9415
    return 
9416
'<html>
9417
<head>
9418
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
9419
<meta name="GENERATOR" content="cloc http://cloc.sourceforge.net">
9420
' .
9421
"
9422
<!-- Created by $script v$VERSION -->
9423
<title>$title</title>
9424
" .
9425
'
9426
<style TYPE="text/css">
9427
<!--
9428
    body {
9429
        color: black;
9430
        background-color: white;
9431
        font-family: monospace
9432
    }
9433
 
9434
    .whitespace {
9435
        background-color: gray;
9436
    }
9437
 
9438
    .comment {
9439
        color: gray;
9440
        font-style: italic;
9441
    }
9442
 
9443
    .clinenum {
9444
        color: red;
9445
    }
9446
 
9447
    .linenum {
9448
        color: green;
9449
    }
9450
 -->
9451
</style>
9452
</head>
9453
<body>
9454
<pre><tt>
9455
';
9456
    print "<- html_header\n" if $opt_v > 2;
9457
} # 1}}}
9458
sub html_end {                               # {{{1
9459
return 
9460
'</tt></pre>
9461
</body>
9462
</html>
9463
';
9464
} # 1}}}
9465
sub die_unknown_lang {                       # {{{1
9466
    my ($lang, $option_name) = @_;
9467
    die "Unknown language '$lang' used with $option_name option.  " .
9468
        "The command\n  $script --show-lang\n" .
9469
        "will print all recognized languages.  Language names are " .
9470
        "case sensitive.\n" ;
9471
} # 1}}}
9472
sub unicode_file {                           # {{{1
9473
    my $file = shift @_; 
9474
 
9475
    print "-> unicode_file($file)\n" if $opt_v > 2;
9476
    return 0 if (-s $file > 2_000_000);  
9477
    # don't bother trying to test binary files bigger than 2 MB
9478
 
9479
    my $IN = new IO::File $file, "r";
9480
    if (!defined $IN) {
9481
        warn "Unable to read $file; ignoring.\n";
9482
        return 0;
9483
    }
9484
    my @lines = <$IN>;
9485
    $IN->close;
9486
 
9487
    if (unicode_to_ascii( join('', @lines) )) {
9488
        print "<- unicode_file()\n" if $opt_v > 2;
9489
        return 1;
9490
    } else {
9491
        print "<- unicode_file()\n" if $opt_v > 2;
9492
        return 0;
9493
    }
9494
 
9495
} # 1}}}
9496
sub unicode_to_ascii {                       # {{{1
9497
    my $string = shift @_; 
9498
 
9499
    # A trivial attempt to convert UTF-16 little or big endian
9500
    # files into ASCII.  These files exhibit the following byte
9501
    # sequence:
9502
    #   byte   1:  255
9503
    #   byte   2:  254
9504
    #   byte   3:  ord of ASCII character
9505
    #   byte   4:    0
9506
    #   byte 3+i:  ord of ASCII character
9507
    #   byte 4+i:    0
9508
    # or
9509
    #   byte   1:  255
9510
    #   byte   2:  254
9511
    #   byte   3:    0
9512
    #   byte   4:  ord of ASCII character
9513
    #   byte 3+i:    0
9514
    #   byte 4+i:  ord of ASCII character
9515
 
9516
    my $length  = length $string;
9517
#print "length=$length\n";
9518
    return '' if $length <= 3;
9519
    my @unicode = split(//, $string);
9520
 
9521
    # check the first 100 characters for big or little endian UTF-16 encoding
9522
    my $max_peek = $length < 200 ? $length : 200;
9523
    my @view_1   = ();
9524
    for (my $i = 2; $i < $max_peek; $i += 2) { push @view_1, $unicode[$i] }
9525
    my @view_2   = ();
9526
    for (my $i = 3; $i < $max_peek; $i += 2) { push @view_2, $unicode[$i] }
9527
 
9528
    my $points_1 = 0;
9529
    foreach my $C (@view_1) {
9530
        ++$points_1 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
9531
                                                          or ord($C) == 10
9532
                                                          or ord($C) ==  9;
9533
    }
9534
 
9535
    my $points_2 = 0;
9536
    foreach my $C (@view_2) {
9537
        ++$points_2 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
9538
                                                          or ord($C) == 10
9539
                                                          or ord($C) ==  9;
9540
    }
9541
#print "points 1: $points_1\n";
9542
#print "points 2: $points_2\n";
9543
 
9544
    my $offset = undef;
9545
    if    ($points_1 > 90) { $offset = 2; }
9546
    elsif ($points_2 > 90) { $offset = 3; }
9547
    else                   { return '' }  # neither big or little endian UTF-16
9548
 
9549
    my @ascii              = ();
9550
    for (my $i = $offset; $i < $length; $i += 2) { push @ascii, $unicode[$i]; }
9551
    return join("", @ascii);
9552
} # 1}}}
9553
sub uncompress_archive_cmd {                 # {{{1
9554
    my ($archive_file, ) = @_;
9555
 
9556
    # Wrap $archive_file in single or double quotes in the system
9557
    # commands below to avoid filename chicanery (including
9558
    # spaces in the names).
9559
 
9560
    print "-> uncompress_archive_cmd($archive_file)\n" if $opt_v > 2;
9561
    my $extract_cmd = "";
9562
    my $missing     = "";
9563
    if ($opt_extract_with) {
9564
        ( $extract_cmd = $opt_extract_with ) =~ s/>FILE</$archive_file/g;
9565
    } elsif (basename($archive_file) eq "-" and !$ON_WINDOWS) {
9566
        $extract_cmd = "cat > -";
9567
    } elsif (($archive_file =~ /\.tar\.(gz|Z)$/ or 
9568
              $archive_file =~ /\.tgz$/       ) and !$ON_WINDOWS)    {
9569
        if (external_utility_exists("gzip --version")) {
9570
            if (external_utility_exists("tar --version")) {
9571
                $extract_cmd = "gzip -dc '$archive_file' | tar xf -";
9572
            } else {
9573
                $missing = "tar";
9574
            }
9575
        } else {
9576
            $missing = "gzip";
9577
        }
9578
    } elsif ($archive_file =~ /\.tar\.bz2$/ and !$ON_WINDOWS)    {
9579
        if (external_utility_exists("bzip2 --help")) {
9580
            if (external_utility_exists("tar --version")) {
9581
                $extract_cmd = "bzip2 -dc '$archive_file' | tar xf -";
9582
            } else {
9583
                $missing = "tar";
9584
            }
9585
        } else {
9586
            $missing = "bzip2";
9587
        }
9588
    } elsif ($archive_file =~ /\.tar\.xz$/ and !$ON_WINDOWS)    {
9589
        if (external_utility_exists("unxz --version")) {
9590
            if (external_utility_exists("tar --version")) {
9591
                $extract_cmd = "unxz -dc '$archive_file' | tar xf -";
9592
            } else {
9593
                $missing = "tar";
9594
            }
9595
        } else {
9596
            $missing = "bzip2";
9597
        }
9598
    } elsif ($archive_file =~ /\.tar$/ and !$ON_WINDOWS)    {
9599
        $extract_cmd = "tar xf '$archive_file'";
9600
    } elsif ($archive_file =~ /\.src\.rpm$/i and !$ON_WINDOWS) {
9601
        if (external_utility_exists("cpio --version")) {
9602
            if (external_utility_exists("rpm2cpio")) {
9603
                $extract_cmd = "rpm2cpio '$archive_file' | cpio -i";
9604
            } else {
9605
                $missing = "rpm2cpio";
9606
            }
9607
        } else {
9608
            $missing = "bzip2";
9609
        }
9610
    } elsif ($archive_file =~ /\.zip$/i and !$ON_WINDOWS)    {
9611
        if (external_utility_exists("unzip")) {
9612
            $extract_cmd = "unzip -qq -d . '$archive_file'";
9613
        } else {
9614
            $missing = "unzip";
9615
        }
9616
    } elsif ($ON_WINDOWS and $archive_file =~ /\.zip$/i) {
9617
        # zip on Windows, guess default Winzip install location
9618
        $extract_cmd = "";
9619
        my $WinZip = '"C:\\Program Files\\WinZip\\WinZip32.exe"';
9620
        if (external_utility_exists($WinZip)) {
9621
            $extract_cmd = "$WinZip -e -o \"$archive_file\" .";
9622
#print "trace 5 extract_cmd=[$extract_cmd]\n";
9623
        } else {
9624
#print "trace 6\n";
9625
            $missing = $WinZip;
9626
        }
9627
    }
9628
    print "<- uncompress_archive_cmd\n" if $opt_v > 2;
9629
    if ($missing) {
9630
        die "Unable to expand $archive_file because external\n",
9631
            "utility '$missing' is not available.\n",
9632
            "Another possibility is to use the --extract-with option.\n";
9633
    } else {
9634
        return $extract_cmd;
9635
    }
9636
}
9637
# 1}}}
9638
sub read_list_file {                         # {{{1
9639
    my ($file, ) = @_;
9640
 
9641
    print "-> read_list_file($file)\n" if $opt_v > 2;
9642
    my $IN = new IO::File $file, "r";
9643
    if (!defined $IN) {
9644
        warn "Unable to read $file; ignoring.\n";
9645
        next;
9646
    }
9647
    my @entry = ();
9648
    while (<$IN>) {
9649
        next if /^\s*$/ or /^\s*#/; # skip empty or commented lines
9650
        s/\cM$//;  # DOS to Unix
9651
        chomp;
9652
        push @entry, $_;
9653
    }
9654
    $IN->close;
9655
 
9656
    print "<- read_list_file\n" if $opt_v > 2;
9657
    return @entry;
9658
}
9659
# 1}}}
9660
sub external_utility_exists {                # {{{1
9661
    my $exe = shift @_;
9662
 
9663
    my $success      = 0;
9664
    if ($ON_WINDOWS) {
9665
        $success = 1 unless system $exe . ' > nul';
9666
    } else {
9667
        $success = 1 unless system $exe . ' >/dev/null 2>&1';
9668
        if (!$success) {
9669
            $success = 1 unless system "which" . " $exe" . ' >/dev/null 2>&1';
9670
        }
9671
    }
9672
 
9673
    return $success;
9674
} # 1}}}
9675
sub write_xsl_file {                         # {{{1
9676
    my $OUT = new IO::File $CLOC_XSL, "w";
9677
    if (!defined $OUT) {
9678
        warn "Unable to write $CLOC_XSL  $!\n";
9679
        return;
9680
    }
9681
    my $XSL =             # <style>  </style> {{{2
9682
'<?xml version="1.0" encoding="US-ASCII"?>
9683
<!-- XLS file by Paul Schwann, January 2009.
9684
     Fixes for by-file and by-file-by-lang by d_uragan, November 2010.
9685
     -->
9686
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
9687
  <xsl:output method="html"/>
9688
  <xsl:template match="/">
9689
    <html xmlns="http://www.w3.org/1999/xhtml">
9690
      <head>
9691
        <title>CLOC Results</title>
9692
      </head>
9693
      <style type="text/css">
9694
        table {
9695
          table-layout: auto;
9696
          border-collapse: collapse;
9697
          empty-cells: show;
9698
        }
9699
        td, th {
9700
          padding: 4px;
9701
        }
9702
        th {
9703
          background-color: #CCCCCC;
9704
        }
9705
        td {
9706
          text-align: center;
9707
        }
9708
        table, td, tr, th {
9709
          border: thin solid #999999;
9710
        }
9711
      </style>
9712
      <body>
9713
        <h3><xsl:value-of select="results/header"/></h3>
9714
';
9715
# 2}}}
9716
 
9717
    if ($opt_by_file) {
9718
        $XSL .=             # <table> </table>{{{2
9719
'        <table>
9720
          <thead>
9721
            <tr>
9722
              <th>File</th>
9723
              <th>Blank</th>
9724
              <th>Comment</th>
9725
              <th>Code</th>
9726
              <th>Language</th>
9727
';
9728
        $XSL .=
9729
'             <th>3<sup>rd</sup> Generation Equivalent</th>
9730
              <th>Scale</th>
9731
' if $opt_3;
9732
        $XSL .=
9733
'           </tr>
9734
          </thead>
9735
          <tbody>
9736
          <xsl:for-each select="results/files/file">
9737
            <tr>
9738
              <th><xsl:value-of select="@name"/></th>
9739
              <td><xsl:value-of select="@blank"/></td>
9740
              <td><xsl:value-of select="@comment"/></td>
9741
              <td><xsl:value-of select="@code"/></td>
9742
              <td><xsl:value-of select="@language"/></td>
9743
';
9744
        $XSL .=
9745
'             <td><xsl:value-of select="@factor"/></td>
9746
              <td><xsl:value-of select="@scaled"/></td>
9747
' if $opt_3;
9748
        $XSL .=
9749
'           </tr>
9750
          </xsl:for-each>
9751
            <tr>
9752
              <th>Total</th>
9753
              <th><xsl:value-of select="results/files/total/@blank"/></th>
9754
              <th><xsl:value-of select="results/files/total/@comment"/></th>
9755
              <th><xsl:value-of select="results/files/total/@code"/></th>
9756
              <th><xsl:value-of select="results/files/total/@language"/></th>
9757
';
9758
        $XSL .=
9759
'             <th><xsl:value-of select="results/files/total/@factor"/></th>
9760
              <th><xsl:value-of select="results/files/total/@scaled"/></th>
9761
' if $opt_3;
9762
        $XSL .=
9763
'           </tr>
9764
          </tbody>
9765
        </table>
9766
        <br/>
9767
';
9768
# 2}}}
9769
    }
9770
 
9771
    if (!$opt_by_file or $opt_by_file_by_lang) {
9772
        $XSL .=             # <table> </table> {{{2
9773
'       <table>
9774
          <thead>
9775
            <tr>
9776
              <th>Language</th>
9777
              <th>Files</th>
9778
              <th>Blank</th>
9779
              <th>Comment</th>
9780
              <th>Code</th>
9781
';
9782
        $XSL .=
9783
'             <th>Scale</th>
9784
              <th>3<sup>rd</sup> Generation Equivalent</th>
9785
' if $opt_3;
9786
        $XSL .=
9787
'           </tr>
9788
          </thead>
9789
          <tbody>
9790
          <xsl:for-each select="results/languages/language">
9791
            <tr>
9792
              <th><xsl:value-of select="@name"/></th>
9793
              <td><xsl:value-of select="@files_count"/></td>
9794
              <td><xsl:value-of select="@blank"/></td>
9795
              <td><xsl:value-of select="@comment"/></td>
9796
              <td><xsl:value-of select="@code"/></td>
9797
';
9798
        $XSL .=
9799
'             <td><xsl:value-of select="@factor"/></td>
9800
              <td><xsl:value-of select="@scaled"/></td>
9801
' if $opt_3;
9802
        $XSL .=
9803
'          </tr>
9804
          </xsl:for-each>
9805
            <tr>
9806
              <th>Total</th>
9807
              <th><xsl:value-of select="results/languages/total/@sum_files"/></th>
9808
              <th><xsl:value-of select="results/languages/total/@blank"/></th>
9809
              <th><xsl:value-of select="results/languages/total/@comment"/></th>
9810
              <th><xsl:value-of select="results/languages/total/@code"/></th>
9811
';
9812
        $XSL .=
9813
'             <th><xsl:value-of select="results/languages/total/@factor"/></th>
9814
              <th><xsl:value-of select="results/languages/total/@scaled"/></th>
9815
' if $opt_3;
9816
        $XSL .=
9817
'           </tr>
9818
          </tbody>
9819
        </table>
9820
';
9821
# 2}}}
9822
    }
9823
 
9824
    $XSL.= <<'EO_XSL'; # {{{2
9825
      </body>
9826
    </html>
9827
  </xsl:template>
9828
</xsl:stylesheet>
9829
 
9830
EO_XSL
9831
# 2}}}
9832
 
9833
    my $XSL_DIFF = <<'EO_DIFF_XSL'; # {{{2
9834
<?xml version="1.0" encoding="US-ASCII"?>
9835
<!-- XLS file by Blazej Kroll, November 2010 -->
9836
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
9837
  <xsl:output method="html"/>
9838
  <xsl:template match="/">
9839
    <html xmlns="http://www.w3.org/1999/xhtml">
9840
      <head>
9841
        <title>CLOC Results</title>
9842
      </head>
9843
      <style type="text/css">
9844
        table {
9845
          table-layout: auto;
9846
          border-collapse: collapse;
9847
          empty-cells: show;
9848
          margin: 1em;
9849
        }
9850
        td, th {
9851
          padding: 4px;
9852
        }
9853
        th {
9854
          background-color: #CCCCCC;
9855
        }
9856
        td {
9857
          text-align: center;
9858
        }
9859
        table, td, tr, th {
9860
          border: thin solid #999999;
9861
        }
9862
      </style>
9863
      <body>
9864
        <h3><xsl:value-of select="results/header"/></h3>
9865
EO_DIFF_XSL
9866
# 2}}}
9867
 
9868
    if ($opt_by_file) {
9869
        $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
9870
        <table>
9871
          <thead>
9872
          <tr><th colspan="4">Same</th>
9873
          </tr>
9874
            <tr>
9875
              <th>File</th>
9876
              <th>Blank</th>
9877
              <th>Comment</th>
9878
              <th>Code</th>              
9879
            </tr>
9880
          </thead>
9881
          <tbody>
9882
          <xsl:for-each select="diff_results/same/file">
9883
            <tr>
9884
              <th><xsl:value-of select="@name"/></th>
9885
              <td><xsl:value-of select="@blank"/></td>
9886
              <td><xsl:value-of select="@comment"/></td>
9887
              <td><xsl:value-of select="@code"/></td>              
9888
            </tr>
9889
          </xsl:for-each>            
9890
          </tbody>
9891
        </table>
9892
 
9893
        <table>
9894
          <thead>
9895
          <tr><th colspan="4">Modified</th>
9896
          </tr>
9897
            <tr>
9898
              <th>File</th>
9899
              <th>Blank</th>
9900
              <th>Comment</th>
9901
              <th>Code</th>              
9902
            </tr>
9903
          </thead>
9904
          <tbody>
9905
          <xsl:for-each select="diff_results/modified/file">
9906
            <tr>
9907
              <th><xsl:value-of select="@name"/></th>
9908
              <td><xsl:value-of select="@blank"/></td>
9909
              <td><xsl:value-of select="@comment"/></td>
9910
              <td><xsl:value-of select="@code"/></td>              
9911
            </tr>
9912
          </xsl:for-each>            
9913
          </tbody>
9914
        </table>
9915
 
9916
        <table>
9917
          <thead>
9918
          <tr><th colspan="4">Added</th>
9919
          </tr>
9920
            <tr>
9921
              <th>File</th>
9922
              <th>Blank</th>
9923
              <th>Comment</th>
9924
              <th>Code</th>              
9925
            </tr>
9926
          </thead>
9927
          <tbody>
9928
          <xsl:for-each select="diff_results/added/file">
9929
            <tr>
9930
              <th><xsl:value-of select="@name"/></th>
9931
              <td><xsl:value-of select="@blank"/></td>
9932
              <td><xsl:value-of select="@comment"/></td>
9933
              <td><xsl:value-of select="@code"/></td>              
9934
            </tr>
9935
          </xsl:for-each>            
9936
          </tbody>
9937
        </table>
9938
 
9939
        <table>
9940
          <thead>
9941
          <tr><th colspan="4">Removed</th>
9942
          </tr>
9943
            <tr>
9944
              <th>File</th>
9945
              <th>Blank</th>
9946
              <th>Comment</th>
9947
              <th>Code</th>              
9948
            </tr>
9949
          </thead>
9950
          <tbody>
9951
          <xsl:for-each select="diff_results/removed/file">
9952
            <tr>
9953
              <th><xsl:value-of select="@name"/></th>
9954
              <td><xsl:value-of select="@blank"/></td>
9955
              <td><xsl:value-of select="@comment"/></td>
9956
              <td><xsl:value-of select="@code"/></td>              
9957
            </tr>
9958
          </xsl:for-each>            
9959
          </tbody>
9960
        </table>
9961
EO_DIFF_XSL
9962
# 2}}}
9963
    }
9964
 
9965
    if (!$opt_by_file or $opt_by_file_by_lang) {
9966
        $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
9967
        <table>
9968
          <thead>
9969
          <tr><th colspan="5">Same</th>
9970
          </tr>
9971
            <tr>
9972
              <th>Language</th>
9973
              <th>Files</th>
9974
              <th>Blank</th>
9975
              <th>Comment</th>
9976
              <th>Code</th>              
9977
            </tr>
9978
          </thead>
9979
          <tbody>
9980
          <xsl:for-each select="diff_results/same/language">
9981
            <tr>
9982
              <th><xsl:value-of select="@name"/></th>
9983
              <td><xsl:value-of select="@files_count"/></td>
9984
              <td><xsl:value-of select="@blank"/></td>
9985
              <td><xsl:value-of select="@comment"/></td>
9986
              <td><xsl:value-of select="@code"/></td>              
9987
            </tr>
9988
          </xsl:for-each>            
9989
          </tbody>
9990
        </table>
9991
 
9992
        <table>
9993
          <thead>
9994
          <tr><th colspan="5">Modified</th>
9995
          </tr>
9996
            <tr>
9997
              <th>Language</th>
9998
              <th>Files</th>
9999
              <th>Blank</th>
10000
              <th>Comment</th>
10001
              <th>Code</th>              
10002
            </tr>
10003
          </thead>
10004
          <tbody>
10005
          <xsl:for-each select="diff_results/modified/language">
10006
            <tr>
10007
              <th><xsl:value-of select="@name"/></th>
10008
              <td><xsl:value-of select="@files_count"/></td>
10009
              <td><xsl:value-of select="@blank"/></td>
10010
              <td><xsl:value-of select="@comment"/></td>
10011
              <td><xsl:value-of select="@code"/></td>              
10012
            </tr>
10013
          </xsl:for-each>            
10014
          </tbody>
10015
        </table>
10016
 
10017
        <table>
10018
          <thead>
10019
          <tr><th colspan="5">Added</th>
10020
          </tr>
10021
            <tr>
10022
              <th>Language</th>
10023
              <th>Files</th>
10024
              <th>Blank</th>
10025
              <th>Comment</th>
10026
              <th>Code</th>              
10027
            </tr>
10028
          </thead>
10029
          <tbody>
10030
          <xsl:for-each select="diff_results/added/language">
10031
            <tr>
10032
              <th><xsl:value-of select="@name"/></th>
10033
              <td><xsl:value-of select="@files_count"/></td>
10034
              <td><xsl:value-of select="@blank"/></td>
10035
              <td><xsl:value-of select="@comment"/></td>
10036
              <td><xsl:value-of select="@code"/></td>              
10037
            </tr>
10038
          </xsl:for-each>            
10039
          </tbody>
10040
        </table>
10041
 
10042
        <table>
10043
          <thead>
10044
          <tr><th colspan="5">Removed</th>
10045
          </tr>
10046
            <tr>
10047
              <th>Language</th>
10048
              <th>Files</th>
10049
              <th>Blank</th>
10050
              <th>Comment</th>
10051
              <th>Code</th>              
10052
            </tr>
10053
          </thead>
10054
          <tbody>
10055
          <xsl:for-each select="diff_results/removed/language">
10056
            <tr>
10057
              <th><xsl:value-of select="@name"/></th>
10058
              <td><xsl:value-of select="@files_count"/></td>
10059
              <td><xsl:value-of select="@blank"/></td>
10060
              <td><xsl:value-of select="@comment"/></td>
10061
              <td><xsl:value-of select="@code"/></td>              
10062
            </tr>
10063
          </xsl:for-each>            
10064
          </tbody>
10065
        </table>
10066
EO_DIFF_XSL
10067
# 2}}}
10068
 
10069
    }
10070
 
10071
    $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
10072
      </body>
10073
    </html>
10074
  </xsl:template>
10075
</xsl:stylesheet>
10076
EO_DIFF_XSL
10077
# 2}}}
10078
    if ($opt_diff) {
10079
        print $OUT $XSL_DIFF;
10080
    } else {
10081
        print $OUT $XSL;
10082
    }
10083
    $OUT->close();
10084
} # 1}}}
10085
sub normalize_file_names {                   # {{{1 
10086
    my (@files, ) = @_;
10087
 
10088
    # Returns a hash of file names reduced to a canonical form
10089
    # (fully qualified file names, all path separators changed to /,
10090
    # Windows file names lowercased).  Hash values are the original
10091
    # file name.
10092
 
10093
    my %normalized = ();
10094
    foreach my $F (@files) {
10095
        my $F_norm = $F;
10096
        if ($ON_WINDOWS) {
10097
            $F_norm = lc $F_norm; # for case insensitive file name comparisons
10098
            $F_norm =~ s{\\}{/}g; # Windows directory separators to Unix
10099
            $F_norm =~ s{^\./}{}g;  # remove leading ./
10100
            if (($F_norm !~ m{^/}) and ($F_norm !~ m{^\w:/})) {
10101
                # looks like a relative path; prefix with cwd
10102
                $F_norm = lc "$cwd/$F_norm";
10103
            }
10104
        } else {
10105
            $F_norm =~ s{^\./}{}g;  # remove leading ./
10106
            if ($F_norm !~ m{^/}) {
10107
                # looks like a relative path; prefix with cwd
10108
                $F_norm = lc "$cwd/$F_norm";
10109
            }
10110
        }
10111
        $normalized{ $F_norm } = $F;
10112
    }
10113
    return %normalized;
10114
} # 1}}}
10115
sub combine_diffs {                          # {{{1
10116
    # subroutine by Andy (awalshe@sf.net)
10117
    # https://sourceforge.net/tracker/?func=detail&aid=3261017&group_id=174787&atid=870625
10118
    my ($ra_files) = @_;
10119
 
10120
    my $res   = "$URL v $VERSION\n";
10121
    my $dl    = '-';
10122
    my $width = 79;
10123
    # columns are in this order
10124
    my @cols  = ('files', 'blank', 'comment', 'code');
10125
    my %HoH   = ();
10126
 
10127
    foreach my $file (@{$ra_files}) {
10128
        my $IN = new IO::File $file, "r";
10129
        if (!defined $IN) {
10130
            warn "Unable to read $file; ignoring.\n";
10131
            next;
10132
        }
10133
 
10134
        my $sec;
10135
        while (<$IN>) {
10136
            chomp;
10137
            s/\cM$//;
10138
            next if /^(http|Language|-----)/;
10139
            if (/^[A-Za-z0-9]+/) {        # section title
10140
                $sec = $_;
10141
                chomp($sec);
10142
                $HoH{$sec} = () if ! exists $HoH{$sec};
10143
                next;
10144
            }
10145
 
10146
            if (/^\s(same|modified|added|removed)/) {  # calculated totals row
10147
                my @ar = grep { $_ ne '' } split(/ /, $_);
10148
                chomp(@ar);
10149
                my $ttl = shift @ar;
10150
                my $i = 0;
10151
                foreach(@ar) {
10152
                    my $t = "${ttl}${dl}${cols[$i]}";
10153
                    $HoH{$sec}{$t} = 0 if ! exists $HoH{$sec}{$t};
10154
                    $HoH{$sec}{$t} += $_;
10155
                    $i++;
10156
                }
10157
            }
10158
        }
10159
        $IN->close;
10160
    }
10161
 
10162
    # rows are in this order
10163
    my @rows = ('same', 'modified', 'added', 'removed');
10164
 
10165
    $res .= sprintf("%s\n", "-" x $width);
10166
    $res .= sprintf("%-19s %14s %14s %14s %14s\n", 'Language', 
10167
                    $cols[0], $cols[1], $cols[2], $cols[3]);
10168
    $res .= sprintf("%s\n", "-" x $width);
10169
 
10170
    for my $sec ( keys %HoH ) {
10171
        next if $sec =~ /SUM:/;
10172
        $res .= "$sec\n";
10173
        foreach (@rows) {
10174
            $res .= sprintf(" %-18s %14s %14s %14s %14s\n", 
10175
                            $_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
10176
                                $HoH{$sec}{"${_}${dl}${cols[1]}"},
10177
                                $HoH{$sec}{"${_}${dl}${cols[2]}"},
10178
                                $HoH{$sec}{"${_}${dl}${cols[3]}"});
10179
        }
10180
    }
10181
    $res .= sprintf("%s\n", "-" x $width);
10182
    my $sec = 'SUM:';
10183
    $res .= "$sec\n";
10184
    foreach (@rows) {
10185
        $res .= sprintf(" %-18s %14s %14s %14s %14s\n", 
10186
                        $_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
10187
                            $HoH{$sec}{"${_}${dl}${cols[1]}"},
10188
                            $HoH{$sec}{"${_}${dl}${cols[2]}"},
10189
                            $HoH{$sec}{"${_}${dl}${cols[3]}"});
10190
    }
10191
    $res .= sprintf("%s\n", "-" x $width);
10192
 
10193
    return $res;
10194
} # 1}}}
10195
sub get_time {                               # {{{1
10196
    if ($HAVE_Time_HiRes) {
10197
        return Time::HiRes::time();
10198
    } else {
10199
        return time();
10200
    }
10201
} # 1}}}
10202
sub really_is_D {                            # {{{1
10203
    # Ref bug 131, files ending with .d could be init.d scripts
10204
    # instead of D language source files.
10205
    my ($file        , # in
10206
        $rh_Err      , # in   hash of error codes
10207
        $raa_errors  , # out
10208
       ) = @_;
10209
    print "-> really_is_D($file)\n" if $opt_v > 2;
10210
    my $possible_script = peek_at_first_line($file, $rh_Err, $raa_errors);
10211
 
10212
    print "<- really_is_D($file)\n" if $opt_v > 2;
10213
    return $possible_script;    # null string if D, otherwise a language
10214
} # 1}}}
10215
# subroutines copied from SLOCCount
10216
my %lex_files    = ();  # really_is_lex()
10217
my %expect_files = ();  # really_is_expect()
10218
my %php_files    = ();  # really_is_php()
10219
sub really_is_lex {                          # {{{1
10220
# Given filename, returns TRUE if its contents really is lex.
10221
# lex file must have "%%", "%{", and "%}".
10222
# In theory, a lex file doesn't need "%{" and "%}", but in practice
10223
# they all have them, and requiring them avoid mislabeling a
10224
# non-lexfile as a lex file.
10225
 
10226
 my $filename = shift;
10227
 chomp($filename);
10228
 
10229
 my $is_lex = 0;      # Value to determine.
10230
 my $percent_percent = 0;
10231
 my $percent_opencurly = 0;
10232
 my $percent_closecurly = 0;
10233
 
10234
 # Return cached result, if available:
10235
 if ($lex_files{$filename}) { return $lex_files{$filename};}
10236
 
10237
 open(LEX_FILE, "<$filename") ||
10238
      die "Can't open $filename to determine if it's lex.\n";
10239
 while(<LEX_FILE>) {
10240
   $percent_percent++     if (m/^\s*\%\%/);
10241
   $percent_opencurly++   if (m/^\s*\%\{/);
10242
   $percent_closecurly++   if (m/^\s*\%\}/);
10243
 }
10244
 close(LEX_FILE);
10245
 
10246
 if ($percent_percent && $percent_opencurly && $percent_closecurly)
10247
          {$is_lex = 1;}
10248
 
10249
 $lex_files{$filename} = $is_lex; # Store result in cache.
10250
 
10251
 return $is_lex;
10252
} # 1}}}
10253
sub really_is_expect {                       # {{{1
10254
# Given filename, returns TRUE if its contents really are Expect.
10255
# Many "exp" files (such as in Apache and Mesa) are just "export" data,
10256
# summarizing something else # (e.g., its interface).
10257
# Sometimes (like in RPM) it's just misc. data.
10258
# Thus, we need to look at the file to determine
10259
# if it's really an "expect" file.
10260
 
10261
 my $filename = shift;
10262
 chomp($filename);
10263
 
10264
# The heuristic is as follows: it's Expect _IF_ it:
10265
# 1. has "load_lib" command and either "#" comments or {}.
10266
# 2. {, }, and one of: proc, if, [...], expect
10267
 
10268
 my $is_expect = 0;      # Value to determine.
10269
 
10270
 my $begin_brace = 0;  # Lines that begin with curly braces.
10271
 my $end_brace = 0;    # Lines that begin with curly braces.
10272
 my $load_lib = 0;     # Lines with the Load_lib command.
10273
 my $found_proc = 0;
10274
 my $found_if = 0;
10275
 my $found_brackets = 0;
10276
 my $found_expect = 0;
10277
 my $found_pound = 0;
10278
 
10279
 # Return cached result, if available:
10280
 if ($expect_files{$filename}) { return expect_files{$filename};}
10281
 
10282
 open(EXPECT_FILE, "<$filename") ||
10283
      die "Can't open $filename to determine if it's expect.\n";
10284
 while(<EXPECT_FILE>) {
10285
 
10286
   if (m/#/) {$found_pound++; s/#.*//;}
10287
   if (m/^\s*\{/) { $begin_brace++;}
10288
   if (m/\{\s*$/) { $begin_brace++;}
10289
   if (m/^\s*\}/) { $end_brace++;}
10290
   if (m/\};?\s*$/) { $end_brace++;}
10291
   if (m/^\s*load_lib\s+\S/) { $load_lib++;}
10292
   if (m/^\s*proc\s/) { $found_proc++;}
10293
   if (m/^\s*if\s/) { $found_if++;}
10294
   if (m/\[.*\]/) { $found_brackets++;}
10295
   if (m/^\s*expect\s/) { $found_expect++;}
10296
 }
10297
 close(EXPECT_FILE);
10298
 
10299
 if ($load_lib && ($found_pound || ($begin_brace && $end_brace)))
10300
          {$is_expect = 1;}
10301
 if ( $begin_brace && $end_brace &&
10302
      ($found_proc || $found_if || $found_brackets || $found_expect))
10303
          {$is_expect = 1;}
10304
 
10305
 $expect_files{$filename} = $is_expect; # Store result in cache.
10306
 
10307
 return $is_expect;
10308
} # 1}}}
10309
sub really_is_pascal {                       # {{{1
10310
# Given filename, returns TRUE if its contents really are Pascal.
10311
 
10312
# This isn't as obvious as it seems.
10313
# Many ".p" files are Perl files
10314
# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
10315
# others are C extractions
10316
# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
10317
# and some files in linuxconf).
10318
# However, test files in "p2c" really are Pascal, for example.
10319
 
10320
# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
10321
# is actually C code.  The heuristics determine that they're not Pascal,
10322
# but because it ends in ".p" it's not counted as C code either.
10323
# I believe this is actually correct behavior, because frankly it
10324
# looks like it's automatically generated (it's a bitmap expressed as code).
10325
# Rather than guess otherwise, we don't include it in a list of
10326
# source files.  Let's face it, someone who creates C files ending in ".p"
10327
# and expects them to be counted by default as C files in SLOCCount needs
10328
# their head examined.  I suggest examining their head
10329
# with a sucker rod (see syslogd(8) for more on sucker rods).
10330
 
10331
# This heuristic counts as Pascal such files such as:
10332
#  /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
10333
# Which is hand-generated.  We don't count woven documents now anyway,
10334
# so this is justifiable.
10335
 
10336
 my $filename = shift;
10337
 chomp($filename);
10338
 
10339
# The heuristic is as follows: it's Pascal _IF_ it has all of the following
10340
# (ignoring {...} and (*...*) comments):
10341
# 1. "^..program NAME" or "^..unit NAME",
10342
# 2. "procedure", "function", "^..interface", or "^..implementation",
10343
# 3. a "begin", and
10344
# 4. it ends with "end.",
10345
#
10346
# Or it has all of the following:
10347
# 1. "^..module NAME" and
10348
# 2. it ends with "end.".
10349
#
10350
# Or it has all of the following:
10351
# 1. "^..program NAME",
10352
# 2. a "begin", and
10353
# 3. it ends with "end.".
10354
#
10355
# The "end." requirements in particular filter out non-Pascal.
10356
#
10357
# Note (jgb): this does not detect Pascal main files in fpc, like
10358
# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
10359
# it
10360
 
10361
 my $is_pascal = 0;      # Value to determine.
10362
 
10363
 my $has_program = 0;
10364
 my $has_unit = 0;
10365
 my $has_module = 0;
10366
 my $has_procedure_or_function = 0;
10367
 my $found_begin = 0;
10368
 my $found_terminating_end = 0;
10369
 my $has_begin = 0;
10370
 
10371
 open(PASCAL_FILE, "<$filename") ||
10372
      die "Can't open $filename to determine if it's pascal.\n";
10373
 while(<PASCAL_FILE>) {
10374
   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
10375
   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
10376
   if (m/\bprogram\s+[A-Za-z]/i)  {$has_program=1;}
10377
   if (m/\bunit\s+[A-Za-z]/i)     {$has_unit=1;}
10378
   if (m/\bmodule\s+[A-Za-z]/i)   {$has_module=1;}
10379
   if (m/\bprocedure\b/i)         { $has_procedure_or_function = 1; }
10380
   if (m/\bfunction\b/i)          { $has_procedure_or_function = 1; }
10381
   if (m/^\s*interface\s+/i)      { $has_procedure_or_function = 1; }
10382
   if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
10383
   if (m/\bbegin\b/i) { $has_begin = 1; }
10384
   # Originally I said:
10385
   # "This heuristic fails if there are multi-line comments after
10386
   # "end."; I haven't seen that in real Pascal programs:"
10387
   # But jgb found there are a good quantity of them in Debian, specially in 
10388
   # fpc (at the end of a lot of files there is a multiline comment
10389
   # with the changelog for the file).
10390
   # Therefore, assume Pascal if "end." appears anywhere in the file.
10391
   if (m/end\.\s*$/i) {$found_terminating_end = 1;}
10392
#   elsif (m/\S/) {$found_terminating_end = 0;}
10393
 }
10394
 close(PASCAL_FILE);
10395
 
10396
 # Okay, we've examined the entire file looking for clues;
10397
 # let's use those clues to determine if it's really Pascal:
10398
 
10399
 if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
10400
     $has_begin && $found_terminating_end ) ||
10401
      ( $has_module && $found_terminating_end ) ||
10402
      ( $has_program && $has_begin && $found_terminating_end ) )
10403
          {$is_pascal = 1;}
10404
 
10405
 return $is_pascal;
10406
} # 1}}}
10407
sub really_is_incpascal {                    # {{{1
10408
# Given filename, returns TRUE if its contents really are Pascal.
10409
# For .inc files (mainly seen in fpc)
10410
 
10411
 my $filename = shift;
10412
 chomp($filename);
10413
 
10414
# The heuristic is as follows: it is Pacal if any of the following:
10415
# 1. really_is_pascal returns true
10416
# 2. Any usual reserverd word is found (program, unit, const, begin...)
10417
 
10418
 # If the general routine for Pascal files works, we have it
10419
 if (really_is_pascal($filename)) { 
10420
   return 1;
10421
 }
10422
 
10423
 my $is_pascal = 0;      # Value to determine.
10424
 my $found_begin = 0;
10425
 
10426
 open(PASCAL_FILE, "<$filename") ||
10427
      die "Can't open $filename to determine if it's pascal.\n";
10428
 while(<PASCAL_FILE>) {
10429
   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
10430
   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
10431
   if (m/\bprogram\s+[A-Za-z]/i)  {$is_pascal=1;}
10432
   if (m/\bunit\s+[A-Za-z]/i)     {$is_pascal=1;}
10433
   if (m/\bmodule\s+[A-Za-z]/i)   {$is_pascal=1;}
10434
   if (m/\bprocedure\b/i)         {$is_pascal = 1; }
10435
   if (m/\bfunction\b/i)          {$is_pascal = 1; }
10436
   if (m/^\s*interface\s+/i)      {$is_pascal = 1; }
10437
   if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
10438
   if (m/\bconstant\s+/i)         {$is_pascal=1;}
10439
   if (m/\bbegin\b/i) { $found_begin = 1; }
10440
   if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
10441
   if ($is_pascal) {
10442
     last;
10443
   }
10444
 }
10445
 
10446
 close(PASCAL_FILE);
10447
 return $is_pascal;
10448
} # 1}}}
10449
sub really_is_php {                          # {{{1
10450
# Given filename, returns TRUE if its contents really is php.
10451
 
10452
 my $filename = shift;
10453
 chomp($filename);
10454
 
10455
 my $is_php = 0;      # Value to determine.
10456
 # Need to find a matching pair of surrounds, with ending after beginning:
10457
 my $normal_surround = 0;  # <?; bit 0 = <?, bit 1 = ?>
10458
 my $script_surround = 0;  # <script..>; bit 0 = <script language="php">
10459
 my $asp_surround = 0;     # <%; bit 0 = <%, bit 1 = %>
10460
 
10461
 # Return cached result, if available:
10462
 if ($php_files{$filename}) { return $php_files{$filename};}
10463
 
10464
 open(PHP_FILE, "<$filename") ||
10465
      die "Can't open $filename to determine if it's php.\n";
10466
 while(<PHP_FILE>) {
10467
   if (m/\<\?/)                           { $normal_surround |= 1; }
10468
   if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
10469
   if (m/\<script.*language="?php"?/i)    { $script_surround |= 1; }
10470
   if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
10471
   if (m/\<\%/)                           { $asp_surround |= 1; }
10472
   if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
10473
 }
10474
 close(PHP_FILE);
10475
 
10476
 if ( ($normal_surround == 3) || ($script_surround == 3) ||
10477
      ($asp_surround == 3)) {
10478
   $is_php = 1;
10479
 }
10480
 
10481
 $php_files{$filename} = $is_php; # Store result in cache.
10482
 
10483
 return $is_php;
10484
} # 1}}}
10485
__END__
10486
mode values (stat $item)[2]
10487
       Unix    Windows
10488
file:  33188   33206
10489
dir :  16832   16895
10490
link:  33261   33206
10491
pipe:   4544    null