| 5282 |
dpurdie |
1 |
#!/usr/bin/env perl
|
|
|
2 |
# cloc -- Count Lines of Code {{{1
|
|
|
3 |
# Copyright (C) 2006-2015 Al Danial <al.danial@gmail.com>
|
|
|
4 |
# First release August 2006
|
|
|
5 |
#
|
|
|
6 |
# Includes code from:
|
|
|
7 |
# - SLOCCount v2.26
|
|
|
8 |
# http://www.dwheeler.com/sloccount/
|
|
|
9 |
# by David Wheeler.
|
|
|
10 |
# - Regexp::Common v2.120
|
|
|
11 |
# http://search.cpan.org/~abigail/Regexp-Common-2.120/lib/Regexp/Common.pm
|
|
|
12 |
# by Damian Conway and Abigail.
|
|
|
13 |
# - Win32::Autoglob
|
|
|
14 |
# http://search.cpan.org/~sburke/Win32-Autoglob-1.01/Autoglob.pm
|
|
|
15 |
# by Sean M. Burke.
|
|
|
16 |
# - Algorithm::Diff
|
|
|
17 |
# http://search.cpan.org/~tyemq/Algorithm-Diff-1.1902/lib/Algorithm/Diff.pm
|
|
|
18 |
# by Tye McQueen.
|
|
|
19 |
#
|
|
|
20 |
# This program is free software; you can redistribute it and/or modify
|
|
|
21 |
# it under the terms of the GNU General Public License as published by
|
|
|
22 |
# the Free Software Foundation; either version 2 of the License, or
|
|
|
23 |
# (at your option) any later version.
|
|
|
24 |
#
|
|
|
25 |
# This program is distributed in the hope that it will be useful,
|
|
|
26 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
27 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
28 |
# GNU General Public License for more details:
|
|
|
29 |
# <http://www.gnu.org/licenses/gpl.txt>.
|
|
|
30 |
#
|
|
|
31 |
# 1}}}
|
|
|
32 |
my $VERSION = "1.65"; # odd number == beta; even number == stable
|
|
|
33 |
my $URL = "https://github.com/AlDanial/cloc";
|
|
|
34 |
require 5.006;
|
|
|
35 |
# use modules {{{1
|
|
|
36 |
use warnings;
|
|
|
37 |
use strict;
|
|
|
38 |
use Getopt::Long;
|
|
|
39 |
use File::Basename;
|
|
|
40 |
use File::Temp qw { tempfile tempdir };
|
|
|
41 |
use File::Find;
|
|
|
42 |
use File::Path;
|
|
|
43 |
use File::Spec;
|
|
|
44 |
use IO::File;
|
|
|
45 |
use POSIX "strftime";
|
|
|
46 |
|
|
|
47 |
# Digest::MD5 isn't in the standard distribution. Use it only if installed.
|
|
|
48 |
my $HAVE_Digest_MD5 = 0;
|
|
|
49 |
eval "use Digest::MD5;";
|
|
|
50 |
if (defined $Digest::MD5::VERSION) {
|
|
|
51 |
$HAVE_Digest_MD5 = 1;
|
|
|
52 |
} else {
|
|
|
53 |
warn "Digest::MD5 not installed; will skip file uniqueness checks.\n";
|
|
|
54 |
}
|
|
|
55 |
|
|
|
56 |
# Time::HiRes became standard with Perl 5.8
|
|
|
57 |
my $HAVE_Time_HiRes = 0;
|
|
|
58 |
eval "use Time::HiRes;";
|
|
|
59 |
$HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION;
|
|
|
60 |
|
|
|
61 |
my $HAVE_Rexexp_Common;
|
|
|
62 |
# Regexp::Common isn't in the standard distribution. It will
|
|
|
63 |
# be installed in a temp directory if necessary.
|
|
|
64 |
BEGIN {
|
|
|
65 |
if (eval "use Regexp::Common;") {
|
|
|
66 |
$HAVE_Rexexp_Common = 1;
|
|
|
67 |
} else {
|
|
|
68 |
$HAVE_Rexexp_Common = 0;
|
|
|
69 |
}
|
|
|
70 |
}
|
|
|
71 |
|
|
|
72 |
my $HAVE_Algorith_Diff = 0;
|
|
|
73 |
# Algorithm::Diff isn't in the standard distribution. It will
|
|
|
74 |
# be installed in a temp directory if necessary.
|
|
|
75 |
eval "use Algorithm::Diff qw ( sdiff ) ";
|
|
|
76 |
if (defined $Algorithm::Diff::VERSION) {
|
|
|
77 |
$HAVE_Algorith_Diff = 1;
|
|
|
78 |
} else {
|
|
|
79 |
Install_Algorithm_Diff();
|
|
|
80 |
}
|
|
|
81 |
# print "2 HAVE_Algorith_Diff = $HAVE_Algorith_Diff\n";
|
|
|
82 |
# test_alg_diff($ARGV[$#ARGV - 1], $ARGV[$#ARGV]); die;
|
|
|
83 |
|
|
|
84 |
# Uncomment next two lines when building Windows executable with perl2exe
|
|
|
85 |
# or if running on a system that already has Regexp::Common.
|
|
|
86 |
#use Regexp::Common;
|
|
|
87 |
#$HAVE_Rexexp_Common = 1;
|
|
|
88 |
|
|
|
89 |
#perl2exe_include "Regexp/Common/whitespace.pm"
|
|
|
90 |
#perl2exe_include "Regexp/Common/URI.pm"
|
|
|
91 |
#perl2exe_include "Regexp/Common/URI/fax.pm"
|
|
|
92 |
#perl2exe_include "Regexp/Common/URI/file.pm"
|
|
|
93 |
#perl2exe_include "Regexp/Common/URI/ftp.pm"
|
|
|
94 |
#perl2exe_include "Regexp/Common/URI/gopher.pm"
|
|
|
95 |
#perl2exe_include "Regexp/Common/URI/http.pm"
|
|
|
96 |
#perl2exe_include "Regexp/Common/URI/pop.pm"
|
|
|
97 |
#perl2exe_include "Regexp/Common/URI/prospero.pm"
|
|
|
98 |
#perl2exe_include "Regexp/Common/URI/news.pm"
|
|
|
99 |
#perl2exe_include "Regexp/Common/URI/tel.pm"
|
|
|
100 |
#perl2exe_include "Regexp/Common/URI/telnet.pm"
|
|
|
101 |
#perl2exe_include "Regexp/Common/URI/tv.pm"
|
|
|
102 |
#perl2exe_include "Regexp/Common/URI/wais.pm"
|
|
|
103 |
#perl2exe_include "Regexp/Common/CC.pm"
|
|
|
104 |
#perl2exe_include "Regexp/Common/SEN.pm"
|
|
|
105 |
#perl2exe_include "Regexp/Common/number.pm"
|
|
|
106 |
#perl2exe_include "Regexp/Common/delimited.pm"
|
|
|
107 |
#perl2exe_include "Regexp/Common/profanity.pm"
|
|
|
108 |
#perl2exe_include "Regexp/Common/net.pm"
|
|
|
109 |
#perl2exe_include "Regexp/Common/zip.pm"
|
|
|
110 |
#perl2exe_include "Regexp/Common/comment.pm"
|
|
|
111 |
#perl2exe_include "Regexp/Common/balanced.pm"
|
|
|
112 |
#perl2exe_include "Regexp/Common/lingua.pm"
|
|
|
113 |
#perl2exe_include "Regexp/Common/list.pm"
|
|
|
114 |
#perl2exe_include "File/Glob.pm"
|
|
|
115 |
|
|
|
116 |
use Text::Tabs qw { expand };
|
|
|
117 |
use Cwd qw { cwd };
|
|
|
118 |
use File::Glob;
|
|
|
119 |
# 1}}}
|
|
|
120 |
# Usage information, options processing. {{{1
|
|
|
121 |
my $ON_WINDOWS = 0;
|
|
|
122 |
$ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT");
|
|
|
123 |
if ($ON_WINDOWS and $ENV{'SHELL'}) {
|
|
|
124 |
if ($ENV{'SHELL'} =~ m{^/}) {
|
|
|
125 |
$ON_WINDOWS = 0; # make Cygwin look like Unix
|
|
|
126 |
} else {
|
|
|
127 |
$ON_WINDOWS = 1; # MKS defines $SHELL but still acts like Windows
|
|
|
128 |
}
|
|
|
129 |
}
|
|
|
130 |
|
|
|
131 |
my $NN = chr(27) . "[0m"; # normal
|
|
|
132 |
$NN = "" if $ON_WINDOWS or !(-t STDERR); # -t STDERR: is it a terminal?
|
|
|
133 |
my $BB = chr(27) . "[1m"; # bold
|
|
|
134 |
$BB = "" if $ON_WINDOWS or !(-t STDERR);
|
|
|
135 |
my $script = basename $0;
|
|
|
136 |
my $usage = "
|
|
|
137 |
Usage: $script [options] <file(s)/dir(s)> | <set 1> <set 2> | <report files>
|
|
|
138 |
|
|
|
139 |
Count, or compute differences of, physical lines of source code in the
|
|
|
140 |
given files (may be archives such as compressed tarballs or zip files)
|
|
|
141 |
and/or recursively below the given directories.
|
|
|
142 |
|
|
|
143 |
${BB}Input Options${NN}
|
|
|
144 |
--extract-with=<cmd> This option is only needed if cloc is unable
|
|
|
145 |
to figure out how to extract the contents of
|
|
|
146 |
the input file(s) by itself.
|
|
|
147 |
Use <cmd> to extract binary archive files (e.g.:
|
|
|
148 |
.tar.gz, .zip, .Z). Use the literal '>FILE<' as
|
|
|
149 |
a stand-in for the actual file(s) to be
|
|
|
150 |
extracted. For example, to count lines of code
|
|
|
151 |
in the input files
|
|
|
152 |
gcc-4.2.tar.gz perl-5.8.8.tar.gz
|
|
|
153 |
on Unix use
|
|
|
154 |
--extract-with='gzip -dc >FILE< | tar xf -'
|
|
|
155 |
or, if you have GNU tar,
|
|
|
156 |
--extract-with='tar zxf >FILE<'
|
|
|
157 |
and on Windows use, for example:
|
|
|
158 |
--extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\"
|
|
|
159 |
(if WinZip is installed there).
|
|
|
160 |
--list-file=<file> Take the list of file and/or directory names to
|
|
|
161 |
process from <file>, which has one file/directory
|
|
|
162 |
name per line. Only exact matches are counted;
|
|
|
163 |
relative path names will be resolved starting from
|
|
|
164 |
the directory where cloc is invoked.
|
|
|
165 |
See also --exclude-list-file.
|
|
|
166 |
--unicode Check binary files to see if they contain Unicode
|
|
|
167 |
expanded ASCII text. This causes performance to
|
|
|
168 |
drop noticably.
|
|
|
169 |
|
|
|
170 |
${BB}Processing Options${NN}
|
|
|
171 |
--autoconf Count .in files (as processed by GNU autoconf) of
|
|
|
172 |
recognized languages.
|
|
|
173 |
--by-file Report results for every source file encountered.
|
|
|
174 |
--by-file-by-lang Report results for every source file encountered
|
|
|
175 |
in addition to reporting by language.
|
|
|
176 |
--count-and-diff <set1> <set2>
|
|
|
177 |
First perform direct code counts of source file(s)
|
|
|
178 |
of <set1> and <set2> separately, then perform a diff
|
|
|
179 |
of these. Inputs may be pairs of files, directories,
|
|
|
180 |
or archives. See also --diff, --diff-alignment,
|
|
|
181 |
--diff-timeout, --ignore-case, --ignore-whitespace.
|
|
|
182 |
--diff <set1> <set2> Compute differences in code and comments between
|
|
|
183 |
source file(s) of <set1> and <set2>. The inputs
|
|
|
184 |
may be pairs of files, directories, or archives.
|
|
|
185 |
Use --diff-alignment to generate a list showing
|
|
|
186 |
which file pairs where compared. See also
|
|
|
187 |
--count-and-diff, --diff-alignment, --diff-timeout,
|
|
|
188 |
--ignore-case, --ignore-whitespace.
|
|
|
189 |
--diff-timeout <N> Ignore files which take more than <N> seconds
|
|
|
190 |
to process. Default is 10 seconds.
|
|
|
191 |
(Large files with many repeated lines can cause
|
|
|
192 |
Algorithm::Diff::sdiff() to take hours.)
|
|
|
193 |
--follow-links [Unix only] Follow symbolic links to directories
|
|
|
194 |
(sym links to files are always followed).
|
|
|
195 |
--force-lang=<lang>[,<ext>]
|
|
|
196 |
Process all files that have a <ext> extension
|
|
|
197 |
with the counter for language <lang>. For
|
|
|
198 |
example, to count all .f files with the
|
|
|
199 |
Fortran 90 counter (which expects files to
|
|
|
200 |
end with .f90) instead of the default Fortran 77
|
|
|
201 |
counter, use
|
|
|
202 |
--force-lang=\"Fortran 90\",f
|
|
|
203 |
If <ext> is omitted, every file will be counted
|
|
|
204 |
with the <lang> counter. This option can be
|
|
|
205 |
specified multiple times (but that is only
|
|
|
206 |
useful when <ext> is given each time).
|
|
|
207 |
See also --script-lang, --lang-no-ext.
|
|
|
208 |
--force-lang-def=<file> Load language processing filters from <file>,
|
|
|
209 |
then use these filters instead of the built-in
|
|
|
210 |
filters. Note: languages which map to the same
|
|
|
211 |
file extension (for example:
|
|
|
212 |
MATLAB/Objective C/MUMPS/Mercury; Pascal/PHP;
|
|
|
213 |
Lisp/OpenCL; Lisp/Julia; Perl/Prolog) will be
|
|
|
214 |
ignored as these require additional processing
|
|
|
215 |
that is not expressed in language definition
|
|
|
216 |
files. Use --read-lang-def to define new
|
|
|
217 |
language filters without replacing built-in
|
|
|
218 |
filters (see also --write-lang-def).
|
|
|
219 |
--ignore-whitespace Ignore horizontal white space when comparing files
|
|
|
220 |
with --diff. See also --ignore-case.
|
|
|
221 |
--ignore-case Ignore changes in case; consider upper- and lower-
|
|
|
222 |
case letters equivalent when comparing files with
|
|
|
223 |
--diff. See also --ignore-whitespace.
|
|
|
224 |
--lang-no-ext=<lang> Count files without extensions using the <lang>
|
|
|
225 |
counter. This option overrides internal logic
|
|
|
226 |
for files without extensions (where such files
|
|
|
227 |
are checked against known scripting languages
|
|
|
228 |
by examining the first line for #!). See also
|
|
|
229 |
--force-lang, --script-lang.
|
|
|
230 |
--max-file-size=<MB> Skip files larger than <MB> megabytes when
|
|
|
231 |
traversing directories. By default, <MB>=100.
|
|
|
232 |
cloc's memory requirement is roughly twenty times
|
|
|
233 |
larger than the largest file so running with
|
|
|
234 |
files larger than 100 MB on a computer with less
|
|
|
235 |
than 2 GB of memory will cause problems.
|
|
|
236 |
Note: this check does not apply to files
|
|
|
237 |
explicitly passed as command line arguments.
|
|
|
238 |
--read-binary-files Process binary files in addition to text files.
|
|
|
239 |
This is usually a bad idea and should only be
|
|
|
240 |
attempted with text files that have embedded
|
|
|
241 |
binary data.
|
|
|
242 |
--read-lang-def=<file> Load new language processing filters from <file>
|
|
|
243 |
and merge them with those already known to cloc.
|
|
|
244 |
If <file> defines a language cloc already knows
|
|
|
245 |
about, cloc's definition will take precedence.
|
|
|
246 |
Use --force-lang-def to over-ride cloc's
|
|
|
247 |
definitions (see also --write-lang-def ).
|
|
|
248 |
--script-lang=<lang>,<s> Process all files that invoke <s> as a #!
|
|
|
249 |
scripting language with the counter for language
|
|
|
250 |
<lang>. For example, files that begin with
|
|
|
251 |
#!/usr/local/bin/perl5.8.8
|
|
|
252 |
will be counted with the Perl counter by using
|
|
|
253 |
--script-lang=Perl,perl5.8.8
|
|
|
254 |
The language name is case insensitive but the
|
|
|
255 |
name of the script language executable, <s>,
|
|
|
256 |
must have the right case. This option can be
|
|
|
257 |
specified multiple times. See also --force-lang,
|
|
|
258 |
--lang-no-ext.
|
|
|
259 |
--sdir=<dir> Use <dir> as the scratch directory instead of
|
|
|
260 |
letting File::Temp chose the location. Files
|
|
|
261 |
written to this location are not removed at
|
|
|
262 |
the end of the run (as they are with File::Temp).
|
|
|
263 |
--skip-uniqueness Skip the file uniqueness check. This will give
|
|
|
264 |
a performance boost at the expense of counting
|
|
|
265 |
files with identical contents multiple times
|
|
|
266 |
(if such duplicates exist).
|
|
|
267 |
--stdin-name=<file> Give a file name to use to determine the language
|
|
|
268 |
for standard input.
|
|
|
269 |
--strip-comments=<ext> For each file processed, write to the current
|
|
|
270 |
directory a version of the file which has blank
|
|
|
271 |
lines and comments removed. The name of each
|
|
|
272 |
stripped file is the original file name with
|
|
|
273 |
.<ext> appended to it. It is written to the
|
|
|
274 |
current directory unless --original-dir is on.
|
|
|
275 |
--original-dir [Only effective in combination with
|
|
|
276 |
--strip-comments] Write the stripped files
|
|
|
277 |
to the same directory as the original files.
|
|
|
278 |
--sum-reports Input arguments are report files previously
|
|
|
279 |
created with the --report-file option. Makes
|
|
|
280 |
a cumulative set of results containing the
|
|
|
281 |
sum of data from the individual report files.
|
|
|
282 |
--unix Override the operating system autodetection
|
|
|
283 |
logic and run in UNIX mode. See also
|
|
|
284 |
--windows, --show-os.
|
|
|
285 |
--windows Override the operating system autodetection
|
|
|
286 |
logic and run in Microsoft Windows mode.
|
|
|
287 |
See also --unix, --show-os.
|
|
|
288 |
|
|
|
289 |
${BB}Filter Options${NN}
|
|
|
290 |
--exclude-dir=<D1>[,D2,] Exclude the given comma separated directories
|
|
|
291 |
D1, D2, D3, et cetera, from being scanned. For
|
|
|
292 |
example --exclude-dir=.cache,test will skip
|
|
|
293 |
all files that have /.cache/ or /test/ as part
|
|
|
294 |
of their path.
|
|
|
295 |
Directories named .bzr, .cvs, .hg, .git, and
|
|
|
296 |
.svn are always excluded.
|
|
|
297 |
--exclude-ext=<ext1>[,<ext2>[...]]
|
|
|
298 |
Do not count files having the given file name
|
|
|
299 |
extensions.
|
|
|
300 |
--exclude-lang=<L1>[,L2,] Exclude the given comma separated languages
|
|
|
301 |
L1, L2, L3, et cetera, from being counted.
|
|
|
302 |
--exclude-list-file=<file> Ignore files and/or directories whose names
|
|
|
303 |
appear in <file>. <file> should have one file
|
|
|
304 |
name per line. Only exact matches are ignored;
|
|
|
305 |
relative path names will be resolved starting from
|
|
|
306 |
the directory where cloc is invoked.
|
|
|
307 |
See also --list-file.
|
|
|
308 |
--include-lang=<L1>[,L2,] Count only the given comma separated languages
|
|
|
309 |
L1, L2, L3, et cetera.
|
|
|
310 |
--match-d=<regex> Only count files in directories matching the Perl
|
|
|
311 |
regex. For example
|
|
|
312 |
--match-d='/(src|include)/'
|
|
|
313 |
only counts files in directories containing
|
|
|
314 |
/src/ or /include/.
|
|
|
315 |
--not-match-d=<regex> Count all files except those in directories
|
|
|
316 |
matching the Perl regex.
|
|
|
317 |
--match-f=<regex> Only count files whose basenames match the Perl
|
|
|
318 |
regex. For example
|
|
|
319 |
--match-f='^[Ww]idget'
|
|
|
320 |
only counts files that start with Widget or widget.
|
|
|
321 |
--not-match-f=<regex> Count all files except those whose basenames
|
|
|
322 |
match the Perl regex.
|
|
|
323 |
--skip-archive=<regex> Ignore files that end with the given Perl regular
|
|
|
324 |
expression. For example, if given
|
|
|
325 |
--skip-archive='(zip|tar(\.(gz|Z|bz2|xz|7z))?)'
|
|
|
326 |
the code will skip files that end with .zip,
|
|
|
327 |
.tar, .tar.gz, .tar.Z, .tar.bz2, .tar.xz, and
|
|
|
328 |
.tar.7z.
|
|
|
329 |
--skip-win-hidden On Windows, ignore hidden files.
|
|
|
330 |
|
|
|
331 |
${BB}Debug Options${NN}
|
|
|
332 |
--categorized=<file> Save names of categorized files to <file>.
|
|
|
333 |
--counted=<file> Save names of processed source files to <file>.
|
|
|
334 |
--explain=<lang> Print the filters used to remove comments for
|
|
|
335 |
language <lang> and exit. In some cases the
|
|
|
336 |
filters refer to Perl subroutines rather than
|
|
|
337 |
regular expressions. An examination of the
|
|
|
338 |
source code may be needed for further explanation.
|
|
|
339 |
--diff-alignment=<file> Write to <file> a list of files and file pairs
|
|
|
340 |
showing which files were added, removed, and/or
|
|
|
341 |
compared during a run with --diff. This switch
|
|
|
342 |
forces the --diff mode on.
|
|
|
343 |
--help Print this usage information and exit.
|
|
|
344 |
--found=<file> Save names of every file found to <file>.
|
|
|
345 |
--ignored=<file> Save names of ignored files and the reason they
|
|
|
346 |
were ignored to <file>.
|
|
|
347 |
--print-filter-stages Print processed source code before and after
|
|
|
348 |
each filter is applied.
|
|
|
349 |
--show-ext[=<ext>] Print information about all known (or just the
|
|
|
350 |
given) file extensions and exit.
|
|
|
351 |
--show-lang[=<lang>] Print information about all known (or just the
|
|
|
352 |
given) languages and exit.
|
|
|
353 |
--show-os Print the value of the operating system mode
|
|
|
354 |
and exit. See also --unix, --windows.
|
|
|
355 |
-v[=<n>] Verbose switch (optional numeric value).
|
|
|
356 |
--version Print the version of this program and exit.
|
|
|
357 |
--write-lang-def=<file> Writes to <file> the language processing filters
|
|
|
358 |
then exits. Useful as a first step to creating
|
|
|
359 |
custom language definitions (see also
|
|
|
360 |
--force-lang-def, --read-lang-def).
|
|
|
361 |
|
|
|
362 |
${BB}Output Options${NN}
|
|
|
363 |
--3 Print third-generation language output.
|
|
|
364 |
(This option can cause report summation to fail
|
|
|
365 |
if some reports were produced with this option
|
|
|
366 |
while others were produced without it.)
|
|
|
367 |
--by-percent X Instead of comment and blank line counts, show
|
|
|
368 |
these values as percentages based on the value
|
|
|
369 |
of X in the denominator:
|
|
|
370 |
X = 'c' -> # lines of code
|
|
|
371 |
X = 'cm' -> # lines of code + comments
|
|
|
372 |
X = 'cb' -> # lines of code + blanks
|
|
|
373 |
X = 'cmb' -> # lines of code + comments + blanks
|
|
|
374 |
For example, if using method 'c' and your code
|
|
|
375 |
has twice as many lines of comments as lines
|
|
|
376 |
of code, the value in the comment column will
|
|
|
377 |
be 200%. The code column remains a line count.
|
|
|
378 |
--csv Write the results as comma separated values.
|
|
|
379 |
--csv-delimiter=<C> Use the character <C> as the delimiter for comma
|
|
|
380 |
separated files instead of ,. This switch forces
|
|
|
381 |
--out=<file> Synonym for --report-file=<file>.
|
|
|
382 |
--csv to be on.
|
|
|
383 |
--progress-rate=<n> Show progress update after every <n> files are
|
|
|
384 |
processed (default <n>=100). Set <n> to 0 to
|
|
|
385 |
suppress progress output (useful when redirecting
|
|
|
386 |
output to STDOUT).
|
|
|
387 |
--quiet Suppress all information messages except for
|
|
|
388 |
the final report.
|
|
|
389 |
--report-file=<file> Write the results to <file> instead of STDOUT.
|
|
|
390 |
--sql=<file> Write results as SQL create and insert statements
|
|
|
391 |
which can be read by a database program such as
|
|
|
392 |
SQLite. If <file> is -, output is sent to STDOUT.
|
|
|
393 |
--sql-append Append SQL insert statements to the file specified
|
|
|
394 |
by --sql and do not generate table creation
|
|
|
395 |
statements. Only valid with the --sql option.
|
|
|
396 |
--sql-project=<name> Use <name> as the project identifier for the
|
|
|
397 |
current run. Only valid with the --sql option.
|
|
|
398 |
--sql-style=<style> Write SQL statements in the given style instead
|
|
|
399 |
of the default SQLite format. Currently, the
|
|
|
400 |
only style option is Oracle.
|
|
|
401 |
--sum-one For plain text reports, show the SUM: output line
|
|
|
402 |
even if only one input file is processed.
|
|
|
403 |
--xml Write the results in XML.
|
|
|
404 |
--xsl=<file> Reference <file> as an XSL stylesheet within
|
|
|
405 |
the XML output. If <file> is 1 (numeric one),
|
|
|
406 |
writes a default stylesheet, cloc.xsl (or
|
|
|
407 |
cloc-diff.xsl if --diff is also given).
|
|
|
408 |
This switch forces --xml on.
|
|
|
409 |
--yaml Write the results in YAML.
|
|
|
410 |
|
|
|
411 |
";
|
|
|
412 |
# Help information for options not yet implemented:
|
|
|
413 |
# --inline Process comments that appear at the end
|
|
|
414 |
# of lines containing code.
|
|
|
415 |
# --html Create HTML files of each input file showing
|
|
|
416 |
# comment and code lines in different colors.
|
|
|
417 |
|
|
|
418 |
$| = 1; # flush STDOUT
|
|
|
419 |
my $start_time = get_time();
|
|
|
420 |
my (
|
|
|
421 |
$opt_categorized ,
|
|
|
422 |
$opt_found ,
|
|
|
423 |
@opt_force_lang ,
|
|
|
424 |
$opt_lang_no_ext ,
|
|
|
425 |
@opt_script_lang ,
|
|
|
426 |
$opt_count_diff ,
|
|
|
427 |
$opt_diff ,
|
|
|
428 |
$opt_diff_alignment ,
|
|
|
429 |
$opt_diff_timeout ,
|
|
|
430 |
$opt_html ,
|
|
|
431 |
$opt_ignored ,
|
|
|
432 |
$opt_counted ,
|
|
|
433 |
$opt_show_ext ,
|
|
|
434 |
$opt_show_lang ,
|
|
|
435 |
$opt_progress_rate ,
|
|
|
436 |
$opt_print_filter_stages ,
|
|
|
437 |
$opt_v ,
|
|
|
438 |
$opt_version ,
|
|
|
439 |
$opt_exclude_lang ,
|
|
|
440 |
$opt_exclude_list_file ,
|
|
|
441 |
$opt_exclude_dir ,
|
|
|
442 |
$opt_explain ,
|
|
|
443 |
$opt_include_lang ,
|
|
|
444 |
$opt_force_lang_def ,
|
|
|
445 |
$opt_read_lang_def ,
|
|
|
446 |
$opt_write_lang_def ,
|
|
|
447 |
$opt_strip_comments ,
|
|
|
448 |
$opt_original_dir ,
|
|
|
449 |
$opt_quiet ,
|
|
|
450 |
$opt_report_file ,
|
|
|
451 |
$opt_sdir ,
|
|
|
452 |
$opt_sum_reports ,
|
|
|
453 |
$opt_unicode ,
|
|
|
454 |
$opt_no3 , # accept it but don't use it
|
|
|
455 |
$opt_3 ,
|
|
|
456 |
$opt_extract_with ,
|
|
|
457 |
$opt_by_file ,
|
|
|
458 |
$opt_by_file_by_lang ,
|
|
|
459 |
$opt_by_percent ,
|
|
|
460 |
$opt_xml ,
|
|
|
461 |
$opt_xsl ,
|
|
|
462 |
$opt_yaml ,
|
|
|
463 |
$opt_csv ,
|
|
|
464 |
$opt_csv_delimiter ,
|
|
|
465 |
$opt_match_f ,
|
|
|
466 |
$opt_not_match_f ,
|
|
|
467 |
$opt_match_d ,
|
|
|
468 |
$opt_not_match_d ,
|
|
|
469 |
$opt_skip_uniqueness ,
|
|
|
470 |
$opt_list_file ,
|
|
|
471 |
$opt_help ,
|
|
|
472 |
$opt_skip_win_hidden ,
|
|
|
473 |
$opt_read_binary_files ,
|
|
|
474 |
$opt_sql ,
|
|
|
475 |
$opt_sql_append ,
|
|
|
476 |
$opt_sql_project ,
|
|
|
477 |
$opt_sql_style ,
|
|
|
478 |
$opt_inline ,
|
|
|
479 |
$opt_exclude_ext ,
|
|
|
480 |
$opt_ignore_whitespace ,
|
|
|
481 |
$opt_ignore_case ,
|
|
|
482 |
$opt_follow_links ,
|
|
|
483 |
$opt_autoconf ,
|
|
|
484 |
$opt_sum_one ,
|
|
|
485 |
$opt_stdin_name ,
|
|
|
486 |
$opt_force_on_windows ,
|
|
|
487 |
$opt_force_on_unix , # actually forces !$ON_WINDOWS
|
|
|
488 |
$opt_show_os ,
|
|
|
489 |
$opt_skip_archive ,
|
|
|
490 |
$opt_max_file_size , # in MB
|
|
|
491 |
);
|
|
|
492 |
my $getopt_success = GetOptions(
|
|
|
493 |
"by_file|by-file" => \$opt_by_file ,
|
|
|
494 |
"by_file_by_lang|by-file-by-lang" => \$opt_by_file_by_lang ,
|
|
|
495 |
"categorized=s" => \$opt_categorized ,
|
|
|
496 |
"counted=s" => \$opt_counted ,
|
|
|
497 |
"include_lang|include-lang=s" => \$opt_include_lang ,
|
|
|
498 |
"exclude_lang|exclude-lang=s" => \$opt_exclude_lang ,
|
|
|
499 |
"exclude_dir|exclude-dir=s" => \$opt_exclude_dir ,
|
|
|
500 |
"exclude_list_file|exclude-list-file=s" => \$opt_exclude_list_file ,
|
|
|
501 |
"explain=s" => \$opt_explain ,
|
|
|
502 |
"extract_with|extract-with=s" => \$opt_extract_with ,
|
|
|
503 |
"found=s" => \$opt_found ,
|
|
|
504 |
"count_and_diff|count-and-diff" => \$opt_count_diff ,
|
|
|
505 |
"diff" => \$opt_diff ,
|
|
|
506 |
"diff-alignment|diff_alignment=s" => \$opt_diff_alignment ,
|
|
|
507 |
"diff-timeout|diff_timeout=i" => \$opt_diff_timeout ,
|
|
|
508 |
"html" => \$opt_html ,
|
|
|
509 |
"ignored=s" => \$opt_ignored ,
|
|
|
510 |
"quiet" => \$opt_quiet ,
|
|
|
511 |
"force_lang_def|force-lang-def=s" => \$opt_force_lang_def ,
|
|
|
512 |
"read_lang_def|read-lang-def=s" => \$opt_read_lang_def ,
|
|
|
513 |
"show_ext|show-ext:s" => \$opt_show_ext ,
|
|
|
514 |
"show_lang|show-lang:s" => \$opt_show_lang ,
|
|
|
515 |
"progress_rate|progress-rate=i" => \$opt_progress_rate ,
|
|
|
516 |
"print_filter_stages|print-filter-stages" => \$opt_print_filter_stages ,
|
|
|
517 |
"report_file|report-file=s" => \$opt_report_file ,
|
|
|
518 |
"out=s" => \$opt_report_file ,
|
|
|
519 |
"script_lang|script-lang=s" => \@opt_script_lang ,
|
|
|
520 |
"sdir=s" => \$opt_sdir ,
|
|
|
521 |
"skip_uniqueness|skip-uniqueness" => \$opt_skip_uniqueness ,
|
|
|
522 |
"strip_comments|strip-comments=s" => \$opt_strip_comments ,
|
|
|
523 |
"original_dir|original-dir" => \$opt_original_dir ,
|
|
|
524 |
"sum_reports|sum-reports" => \$opt_sum_reports ,
|
|
|
525 |
"unicode" => \$opt_unicode ,
|
|
|
526 |
"no3" => \$opt_no3 , # ignored
|
|
|
527 |
"3" => \$opt_3 ,
|
|
|
528 |
"v:i" => \$opt_v ,
|
|
|
529 |
"version" => \$opt_version ,
|
|
|
530 |
"write_lang_def|write-lang-def=s" => \$opt_write_lang_def ,
|
|
|
531 |
"xml" => \$opt_xml ,
|
|
|
532 |
"xsl=s" => \$opt_xsl ,
|
|
|
533 |
"force_lang|force-lang=s" => \@opt_force_lang ,
|
|
|
534 |
"lang_no_ext|lang-no-ext=s" => \$opt_lang_no_ext ,
|
|
|
535 |
"yaml" => \$opt_yaml ,
|
|
|
536 |
"csv" => \$opt_csv ,
|
|
|
537 |
"csv_delimeter|csv-delimiter=s" => \$opt_csv_delimiter ,
|
|
|
538 |
"match_f|match-f=s" => \$opt_match_f ,
|
|
|
539 |
"not_match_f|not-match-f=s" => \$opt_not_match_f ,
|
|
|
540 |
"match_d|match-d=s" => \$opt_match_d ,
|
|
|
541 |
"not_match_d|not-match-d=s" => \$opt_not_match_d ,
|
|
|
542 |
"list_file|list-file=s" => \$opt_list_file ,
|
|
|
543 |
"help" => \$opt_help ,
|
|
|
544 |
"skip_win_hidden|skip-win-hidden" => \$opt_skip_win_hidden ,
|
|
|
545 |
"read_binary_files|read-binary-files" => \$opt_read_binary_files ,
|
|
|
546 |
"sql=s" => \$opt_sql ,
|
|
|
547 |
"sql_project|sql-project=s" => \$opt_sql_project ,
|
|
|
548 |
"sql_append|sql-append" => \$opt_sql_append ,
|
|
|
549 |
"sql_style|sql-style=s" => \$opt_sql_style ,
|
|
|
550 |
"inline" => \$opt_inline ,
|
|
|
551 |
"exclude_ext|exclude-ext=s" => \$opt_exclude_ext ,
|
|
|
552 |
"ignore_whitespace|ignore-whitespace" => \$opt_ignore_whitespace ,
|
|
|
553 |
"ignore_case|ignore-case" => \$opt_ignore_case ,
|
|
|
554 |
"follow_links|follow-links" => \$opt_follow_links ,
|
|
|
555 |
"autoconf" => \$opt_autoconf ,
|
|
|
556 |
"sum_one|sum-one" => \$opt_sum_one ,
|
|
|
557 |
"by_percent|by-percent=s" => \$opt_by_percent ,
|
|
|
558 |
"stdin_name|stdin-name=s" => \$opt_stdin_name ,
|
|
|
559 |
"windows" => \$opt_force_on_windows ,
|
|
|
560 |
"unix" => \$opt_force_on_unix ,
|
|
|
561 |
"show_os|show-os" => \$opt_show_os ,
|
|
|
562 |
"skip_archive|skip-archive=s" => \$opt_skip_archive ,
|
|
|
563 |
"max_file_size|max-file-size=i" => \$opt_max_file_size ,
|
|
|
564 |
);
|
|
|
565 |
$opt_by_file = 1 if defined $opt_by_file_by_lang;
|
|
|
566 |
my $CLOC_XSL = "cloc.xsl"; # created with --xsl
|
|
|
567 |
$CLOC_XSL = "cloc-diff.xsl" if $opt_diff;
|
|
|
568 |
die "\n" unless $getopt_success;
|
|
|
569 |
die $usage if $opt_help;
|
|
|
570 |
my %Exclude_Language = ();
|
|
|
571 |
%Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang)
|
|
|
572 |
if $opt_exclude_lang;
|
|
|
573 |
my %Exclude_Dir = ();
|
|
|
574 |
%Exclude_Dir = map { $_ => 1 } split(/,/, $opt_exclude_dir )
|
|
|
575 |
if $opt_exclude_dir ;
|
|
|
576 |
my %Include_Language = ();
|
|
|
577 |
%Include_Language = map { $_ => 1 } split(/,/, $opt_include_lang)
|
|
|
578 |
if $opt_include_lang;
|
|
|
579 |
# Forcibly exclude .svn, .cvs, .hg, .git, .bzr directories. The contents of these
|
|
|
580 |
# directories often conflict with files of interest.
|
|
|
581 |
$opt_exclude_dir = 1;
|
|
|
582 |
$Exclude_Dir{".svn"} = 1;
|
|
|
583 |
$Exclude_Dir{".cvs"} = 1;
|
|
|
584 |
$Exclude_Dir{".hg"} = 1;
|
|
|
585 |
$Exclude_Dir{".git"} = 1;
|
|
|
586 |
$Exclude_Dir{".bzr"} = 1;
|
|
|
587 |
$opt_count_diff = defined $opt_count_diff ? 1 : 0;
|
|
|
588 |
$opt_diff = 1 if $opt_diff_alignment;
|
|
|
589 |
$opt_exclude_ext = "" unless $opt_exclude_ext;
|
|
|
590 |
$opt_ignore_whitespace = 0 unless $opt_ignore_whitespace;
|
|
|
591 |
$opt_ignore_case = 0 unless $opt_ignore_case;
|
|
|
592 |
$opt_lang_no_ext = 0 unless $opt_lang_no_ext;
|
|
|
593 |
$opt_follow_links = 0 unless $opt_follow_links;
|
|
|
594 |
$opt_diff_timeout =10 unless $opt_diff_timeout;
|
|
|
595 |
$opt_csv = 1 if $opt_csv_delimiter;
|
|
|
596 |
$ON_WINDOWS = 1 if $opt_force_on_windows;
|
|
|
597 |
$ON_WINDOWS = 0 if $opt_force_on_unix;
|
|
|
598 |
$opt_max_file_size = 100 unless $opt_max_file_size;
|
|
|
599 |
|
|
|
600 |
my @COUNT_DIFF_ARGV = undef;
|
|
|
601 |
my $COUNT_DIFF_report_file = undef;
|
|
|
602 |
if ($opt_count_diff) {
|
|
|
603 |
die "--count-and-diff requires two arguments; got ", scalar @ARGV, "\n"
|
|
|
604 |
if scalar @ARGV != 2;
|
|
|
605 |
# prefix with a dummy term so that $opt_count_diff is the
|
|
|
606 |
# index into @COUNT_DIFF_ARGV to work on at each pass
|
|
|
607 |
@COUNT_DIFF_ARGV = (undef, $ARGV[0],
|
|
|
608 |
$ARGV[1],
|
|
|
609 |
[$ARGV[0], $ARGV[1]]); # 3rd pass: diff them
|
|
|
610 |
$COUNT_DIFF_report_file = $opt_report_file if $opt_report_file;
|
|
|
611 |
}
|
|
|
612 |
|
|
|
613 |
# Options defaults:
|
|
|
614 |
$opt_progress_rate = 100 unless defined $opt_progress_rate;
|
|
|
615 |
$opt_progress_rate = 0 if defined $opt_quiet;
|
|
|
616 |
if (!defined $opt_v) {
|
|
|
617 |
$opt_v = 0;
|
|
|
618 |
} elsif (!$opt_v) {
|
|
|
619 |
$opt_v = 1;
|
|
|
620 |
}
|
|
|
621 |
if (defined $opt_xsl) {
|
|
|
622 |
$opt_xsl = $CLOC_XSL if $opt_xsl eq "1";
|
|
|
623 |
$opt_xml = 1;
|
|
|
624 |
}
|
|
|
625 |
my $skip_generate_report = 0;
|
|
|
626 |
$opt_sql_style = 0 unless defined $opt_sql_style;
|
|
|
627 |
$opt_sql = 0 unless $opt_sql_style or defined $opt_sql;
|
|
|
628 |
if ($opt_sql eq "-" || $opt_sql eq "1") { # stream SQL output to STDOUT
|
|
|
629 |
$opt_quiet = 1;
|
|
|
630 |
$skip_generate_report = 1;
|
|
|
631 |
$opt_by_file = 1;
|
|
|
632 |
$opt_sum_reports = 0;
|
|
|
633 |
$opt_progress_rate = 0;
|
|
|
634 |
} elsif ($opt_sql) { # write SQL output to a file
|
|
|
635 |
$opt_by_file = 1;
|
|
|
636 |
$skip_generate_report = 1;
|
|
|
637 |
$opt_sum_reports = 0;
|
|
|
638 |
}
|
|
|
639 |
if ($opt_sql_style) {
|
|
|
640 |
$opt_sql_style = lc $opt_sql_style;
|
|
|
641 |
if (!grep { lc $_ eq $opt_sql_style } qw ( Oracle )) {
|
|
|
642 |
die "'$opt_sql_style' is not a recognized SQL style.\n";
|
|
|
643 |
}
|
|
|
644 |
}
|
|
|
645 |
$opt_by_percent = '' unless defined $opt_by_percent;
|
|
|
646 |
if ($opt_by_percent and $opt_by_percent !~ m/^(c|cm|cb|cmb)$/i) {
|
|
|
647 |
die "--by-percent must be either 'c', 'cm', 'cb', or 'cmb'\n";
|
|
|
648 |
}
|
|
|
649 |
$opt_by_percent = lc $opt_by_percent;
|
|
|
650 |
|
|
|
651 |
die $usage unless defined $opt_version or
|
|
|
652 |
defined $opt_show_lang or
|
|
|
653 |
defined $opt_show_ext or
|
|
|
654 |
defined $opt_show_os or
|
|
|
655 |
defined $opt_write_lang_def or
|
|
|
656 |
defined $opt_list_file or
|
|
|
657 |
defined $opt_xsl or
|
|
|
658 |
defined $opt_explain or
|
|
|
659 |
scalar @ARGV >= 1;
|
|
|
660 |
die "--diff requires two arguments; got ", scalar @ARGV, "\n"
|
|
|
661 |
if $opt_diff and scalar @ARGV != 2;
|
|
|
662 |
if ($opt_version) {
|
|
|
663 |
printf "$VERSION\n";
|
|
|
664 |
exit;
|
|
|
665 |
}
|
|
|
666 |
# 1}}}
|
|
|
667 |
# Step 1: Initialize global constants. {{{1
|
|
|
668 |
#
|
|
|
669 |
my $nFiles_Found = 0; # updated in make_file_list
|
|
|
670 |
my (%Language_by_Extension, %Language_by_Script,
|
|
|
671 |
%Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename,
|
|
|
672 |
%Language_by_File, %Scale_Factor, %Known_Binary_Archives,
|
|
|
673 |
%EOL_Continuation_re,
|
|
|
674 |
);
|
|
|
675 |
my $ALREADY_SHOWED_HEADER = 0;
|
|
|
676 |
my $ALREADY_SHOWED_XML_SECTION = 0;
|
|
|
677 |
my %Error_Codes = ( 'Unable to read' => -1,
|
|
|
678 |
'Neither file nor directory' => -2,
|
|
|
679 |
'Diff error (quoted comments?)' => -3,
|
|
|
680 |
'Diff error, exceeded timeout' => -4,
|
|
|
681 |
);
|
|
|
682 |
if ($opt_force_lang_def) {
|
|
|
683 |
# replace cloc's definitions
|
|
|
684 |
read_lang_def(
|
|
|
685 |
$opt_force_lang_def , # Sample values:
|
|
|
686 |
\%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77'
|
|
|
687 |
\%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell'
|
|
|
688 |
\%Language_by_File , # Language_by_File{makefile} = 'make'
|
|
|
689 |
\%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] =
|
|
|
690 |
# [ 'remove_matches' , '^\s*#' ]
|
|
|
691 |
\%Not_Code_Extension , # Not_Code_Extension{jpg} = 1
|
|
|
692 |
\%Not_Code_Filename , # Not_Code_Filename{README} = 1
|
|
|
693 |
\%Scale_Factor , # Scale_Factor{Perl} = 4.0
|
|
|
694 |
\%EOL_Continuation_re , # EOL_Continuation_re{C++} = '\\$'
|
|
|
695 |
);
|
|
|
696 |
} else {
|
|
|
697 |
set_constants( #
|
|
|
698 |
\%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77'
|
|
|
699 |
\%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell'
|
|
|
700 |
\%Language_by_File , # Language_by_File{makefile} = 'make'
|
|
|
701 |
\%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] =
|
|
|
702 |
# [ 'remove_matches' , '^\s*#' ]
|
|
|
703 |
\%Not_Code_Extension , # Not_Code_Extension{jpg} = 1
|
|
|
704 |
\%Not_Code_Filename , # Not_Code_Filename{README} = 1
|
|
|
705 |
\%Scale_Factor , # Scale_Factor{Perl} = 4.0
|
|
|
706 |
\%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1
|
|
|
707 |
\%EOL_Continuation_re , # EOL_Continuation_re{C++} = '\\$'
|
|
|
708 |
);
|
|
|
709 |
}
|
|
|
710 |
if ($opt_read_lang_def) {
|
|
|
711 |
# augment cloc's definitions (keep cloc's where there are overlaps)
|
|
|
712 |
merge_lang_def(
|
|
|
713 |
$opt_read_lang_def , # Sample values:
|
|
|
714 |
\%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77'
|
|
|
715 |
\%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell'
|
|
|
716 |
\%Language_by_File , # Language_by_File{makefile} = 'make'
|
|
|
717 |
\%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] =
|
|
|
718 |
# [ 'remove_matches' , '^\s*#' ]
|
|
|
719 |
\%Not_Code_Extension , # Not_Code_Extension{jpg} = 1
|
|
|
720 |
\%Not_Code_Filename , # Not_Code_Filename{README} = 1
|
|
|
721 |
\%Scale_Factor , # Scale_Factor{Perl} = 4.0
|
|
|
722 |
\%EOL_Continuation_re , # EOL_Continuation_re{C++} = '\\$'
|
|
|
723 |
);
|
|
|
724 |
}
|
|
|
725 |
if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) {
|
|
|
726 |
die_unknown_lang($opt_lang_no_ext, "--lang-no-ext")
|
|
|
727 |
}
|
|
|
728 |
check_scale_existence(\%Filters_by_Language, \%Language_by_Extension,
|
|
|
729 |
\%Scale_Factor);
|
|
|
730 |
|
|
|
731 |
# Process command line provided extension-to-language mapping overrides.
|
|
|
732 |
# Make a hash of known languages in lower case for easier matching.
|
|
|
733 |
my %Recognized_Language_lc = (); # key = language name in lc, value = true name
|
|
|
734 |
foreach my $language (keys %Filters_by_Language) {
|
|
|
735 |
my $lang_lc = lc $language;
|
|
|
736 |
$Recognized_Language_lc{$lang_lc} = $language;
|
|
|
737 |
}
|
|
|
738 |
my %Forced_Extension = (); # file name extensions which user wants to count
|
|
|
739 |
my $All_One_Language = 0; # set to !0 if --force-lang's <ext> is missing
|
|
|
740 |
foreach my $pair (@opt_force_lang) {
|
|
|
741 |
my ($lang, $extension) = split(',', $pair);
|
|
|
742 |
my $lang_lc = lc $lang;
|
|
|
743 |
if (defined $extension) {
|
|
|
744 |
$Forced_Extension{$extension} = $lang;
|
|
|
745 |
|
|
|
746 |
die_unknown_lang($lang, "--force-lang")
|
|
|
747 |
unless $Recognized_Language_lc{$lang_lc};
|
|
|
748 |
|
|
|
749 |
$Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc};
|
|
|
750 |
} else {
|
|
|
751 |
# the scary case--count everything as this language
|
|
|
752 |
$All_One_Language = $Recognized_Language_lc{$lang_lc};
|
|
|
753 |
}
|
|
|
754 |
}
|
|
|
755 |
|
|
|
756 |
foreach my $pair (@opt_script_lang) {
|
|
|
757 |
my ($lang, $script_name) = split(',', $pair);
|
|
|
758 |
my $lang_lc = lc $lang;
|
|
|
759 |
if (!defined $script_name) {
|
|
|
760 |
die "The --script-lang option requires a comma separated pair of ".
|
|
|
761 |
"strings.\n";
|
|
|
762 |
}
|
|
|
763 |
|
|
|
764 |
die_unknown_lang($lang, "--script-lang")
|
|
|
765 |
unless $Recognized_Language_lc{$lang_lc};
|
|
|
766 |
|
|
|
767 |
$Language_by_Script{$script_name} = $Recognized_Language_lc{$lang_lc};
|
|
|
768 |
}
|
|
|
769 |
|
|
|
770 |
# If user provided file extensions to ignore, add these to
|
|
|
771 |
# the exclusion list.
|
|
|
772 |
foreach my $ext (map { $_ => 1 } split(/,/, $opt_exclude_ext ) ) {
|
|
|
773 |
$ext = lc $ext if $ON_WINDOWS;
|
|
|
774 |
$Not_Code_Extension{$ext} = 1;
|
|
|
775 |
}
|
|
|
776 |
|
|
|
777 |
# If SQL or --by-file output is requested, keep track of directory names
|
|
|
778 |
# generated by File::Temp::tempdir and used to temporarily hold the results
|
|
|
779 |
# of compressed archives. Contents of the SQL table 't' will be much
|
|
|
780 |
# cleaner if these meaningless directory names are stripped from the front
|
|
|
781 |
# of files pulled from the archives.
|
|
|
782 |
my %TEMP_DIR = ();
|
|
|
783 |
my $TEMP_OFF = 0; # Needed for --sdir; keep track of the number of
|
|
|
784 |
# scratch directories made in this run to avoid
|
|
|
785 |
# file overwrites by multiple extractions to same
|
|
|
786 |
# sdir.
|
|
|
787 |
# Also track locations where temporary installations, if necessary, of
|
|
|
788 |
# Algorithm::Diff and/or Regexp::Common are done. Make sure these
|
|
|
789 |
# directories are not counted as inputs (ref bug #80 2012-11-23).
|
|
|
790 |
my %TEMP_INST = ();
|
|
|
791 |
|
|
|
792 |
# invert %Language_by_Script hash to get an easy-to-look-up list of known
|
|
|
793 |
# scripting languages
|
|
|
794 |
my %Script_Language = map { $_ => 1 } values %Language_by_Script ;
|
|
|
795 |
# 1}}}
|
|
|
796 |
# Step 2: Early exits for display, summation. {{{1
|
|
|
797 |
#
|
|
|
798 |
print_extension_info( $opt_show_ext ) if defined $opt_show_ext ;
|
|
|
799 |
print_language_info( $opt_show_lang, '') if defined $opt_show_lang;
|
|
|
800 |
print_language_filters( $opt_explain ) if defined $opt_explain ;
|
|
|
801 |
exit if (defined $opt_show_ext) or
|
|
|
802 |
(defined $opt_show_lang) or
|
|
|
803 |
(defined $opt_explain);
|
|
|
804 |
|
|
|
805 |
Top_of_Processing_Loop:
|
|
|
806 |
# Sorry, coding purists. Using a goto to implement --count-and-diff
|
|
|
807 |
# which has to do three passes over the main code, starting with
|
|
|
808 |
# a clean slate each time.
|
|
|
809 |
if ($opt_count_diff) {
|
|
|
810 |
@ARGV = ( $COUNT_DIFF_ARGV[ $opt_count_diff ] );
|
|
|
811 |
if ($opt_count_diff == 3) {
|
|
|
812 |
$opt_diff = 1;
|
|
|
813 |
@ARGV = @{$COUNT_DIFF_ARGV[ $opt_count_diff ]}; # last arg is list of list
|
|
|
814 |
}
|
|
|
815 |
if ($opt_report_file) {
|
|
|
816 |
# Instead of just one output file, will have three.
|
|
|
817 |
# Keep their names unique otherwise results are clobbered.
|
|
|
818 |
if ($opt_count_diff == 3) {
|
|
|
819 |
$opt_report_file = $COUNT_DIFF_report_file . ".diff.$ARGV[0].$ARGV[1]";
|
|
|
820 |
} else {
|
|
|
821 |
$opt_report_file = $COUNT_DIFF_report_file . "." . $ARGV[0];
|
|
|
822 |
}
|
|
|
823 |
} else {
|
|
|
824 |
# STDOUT; print a header showing what it's working on
|
|
|
825 |
if ($opt_count_diff == 3) {
|
|
|
826 |
print "\ndiff $ARGV[0] $ARGV[1]::\n";
|
|
|
827 |
} else {
|
|
|
828 |
print "\n" if $opt_count_diff > 1;
|
|
|
829 |
print "$ARGV[0]::\n";
|
|
|
830 |
}
|
|
|
831 |
}
|
|
|
832 |
$ALREADY_SHOWED_HEADER = 0;
|
|
|
833 |
$ALREADY_SHOWED_XML_SECTION = 0;
|
|
|
834 |
}
|
|
|
835 |
|
|
|
836 |
#print "Before glob have [", join(",", @ARGV), "]\n";
|
|
|
837 |
@ARGV = windows_glob(@ARGV) if $ON_WINDOWS;
|
|
|
838 |
#print "after glob have [", join(",", @ARGV), "]\n";
|
|
|
839 |
|
|
|
840 |
# filter out archive files if requested to do so
|
|
|
841 |
if (defined $opt_skip_archive) {
|
|
|
842 |
my @non_archive = ();
|
|
|
843 |
foreach my $candidate (@ARGV) {
|
|
|
844 |
if ($candidate !~ m/${opt_skip_archive}$/) {
|
|
|
845 |
push @non_archive, $candidate;
|
|
|
846 |
|
|
|
847 |
}
|
|
|
848 |
}
|
|
|
849 |
@ARGV = @non_archive;
|
|
|
850 |
}
|
|
|
851 |
|
|
|
852 |
if ($opt_sum_reports and $opt_diff) {
|
|
|
853 |
my @results = ();
|
|
|
854 |
if ($opt_list_file) { # read inputs from the list file
|
|
|
855 |
my @list = read_list_file($opt_list_file);
|
|
|
856 |
@results = combine_diffs(\@list);
|
|
|
857 |
} else { # get inputs from the command line
|
|
|
858 |
@results = combine_diffs(\@ARGV);
|
|
|
859 |
}
|
|
|
860 |
if ($opt_report_file) {
|
|
|
861 |
write_file($opt_report_file, @results);
|
|
|
862 |
} else {
|
|
|
863 |
print "\n", join("\n", @results), "\n";
|
|
|
864 |
}
|
|
|
865 |
exit;
|
|
|
866 |
}
|
|
|
867 |
if ($opt_sum_reports) {
|
|
|
868 |
my %Results = ();
|
|
|
869 |
foreach my $type( "by language", "by report file" ) {
|
|
|
870 |
my $found_lang = undef;
|
|
|
871 |
if ($opt_list_file) { # read inputs from the list file
|
|
|
872 |
my @list = read_list_file($opt_list_file);
|
|
|
873 |
$found_lang = combine_results(\@list,
|
|
|
874 |
$type,
|
|
|
875 |
\%{$Results{ $type }},
|
|
|
876 |
\%Filters_by_Language );
|
|
|
877 |
} else { # get inputs from the command line
|
|
|
878 |
$found_lang = combine_results(\@ARGV,
|
|
|
879 |
$type,
|
|
|
880 |
\%{$Results{ $type }},
|
|
|
881 |
\%Filters_by_Language );
|
|
|
882 |
}
|
|
|
883 |
next unless %Results;
|
|
|
884 |
my $end_time = get_time();
|
|
|
885 |
my @results = generate_report($VERSION, $end_time - $start_time,
|
|
|
886 |
$type,
|
|
|
887 |
\%{$Results{ $type }}, \%Scale_Factor);
|
|
|
888 |
if ($opt_report_file) {
|
|
|
889 |
my $ext = ".lang";
|
|
|
890 |
$ext = ".file" unless $type eq "by language";
|
|
|
891 |
next if !$found_lang and $ext eq ".lang";
|
|
|
892 |
write_file($opt_report_file . $ext, @results);
|
|
|
893 |
} else {
|
|
|
894 |
print "\n", join("\n", @results), "\n";
|
|
|
895 |
}
|
|
|
896 |
}
|
|
|
897 |
exit;
|
|
|
898 |
}
|
|
|
899 |
if ($opt_write_lang_def) {
|
|
|
900 |
write_lang_def($opt_write_lang_def ,
|
|
|
901 |
\%Language_by_Extension,
|
|
|
902 |
\%Language_by_Script ,
|
|
|
903 |
\%Language_by_File ,
|
|
|
904 |
\%Filters_by_Language ,
|
|
|
905 |
\%Not_Code_Extension ,
|
|
|
906 |
\%Not_Code_Filename ,
|
|
|
907 |
\%Scale_Factor ,
|
|
|
908 |
\%EOL_Continuation_re ,
|
|
|
909 |
);
|
|
|
910 |
exit;
|
|
|
911 |
}
|
|
|
912 |
if ($opt_show_os) {
|
|
|
913 |
if ($ON_WINDOWS) {
|
|
|
914 |
print "Windows\n";
|
|
|
915 |
} else {
|
|
|
916 |
print "UNIX\n";
|
|
|
917 |
}
|
|
|
918 |
exit;
|
|
|
919 |
}
|
|
|
920 |
# 1}}}
|
|
|
921 |
# Step 3: Create a list of files to consider. {{{1
|
|
|
922 |
# a) If inputs are binary archives, first cd to a temp
|
|
|
923 |
# directory, expand the archive with the user-given
|
|
|
924 |
# extraction tool, then add the temp directory to
|
|
|
925 |
# the list of dirs to process.
|
|
|
926 |
# b) Create a list of every file that might contain source
|
|
|
927 |
# code. Ignore binary files, zero-sized files, and
|
|
|
928 |
# any file in a directory the user says to exclude.
|
|
|
929 |
# c) Determine the language for each file in the list.
|
|
|
930 |
#
|
|
|
931 |
my @binary_archive = ();
|
|
|
932 |
my $cwd = cwd();
|
|
|
933 |
if ($opt_extract_with) {
|
|
|
934 |
#print "cwd main = [$cwd]\n";
|
|
|
935 |
my @extract_location = ();
|
|
|
936 |
foreach my $bin_file (@ARGV) {
|
|
|
937 |
my $extract_dir = undef;
|
|
|
938 |
if ($opt_sdir) {
|
|
|
939 |
++$TEMP_OFF;
|
|
|
940 |
$extract_dir = "$opt_sdir/$TEMP_OFF";
|
|
|
941 |
File::Path::rmtree($extract_dir) if is_dir($extract_dir);
|
|
|
942 |
File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
|
|
|
943 |
} else {
|
|
|
944 |
$extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
|
945 |
}
|
|
|
946 |
$TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
|
|
|
947 |
print "mkdir $extract_dir\n" if $opt_v;
|
|
|
948 |
print "cd $extract_dir\n" if $opt_v;
|
|
|
949 |
chdir $extract_dir;
|
|
|
950 |
my $bin_file_full_path = "";
|
|
|
951 |
if (File::Spec->file_name_is_absolute( $bin_file )) {
|
|
|
952 |
$bin_file_full_path = $bin_file;
|
|
|
953 |
#print "bin_file_full_path (was ful) = [$bin_file_full_path]\n";
|
|
|
954 |
} else {
|
|
|
955 |
$bin_file_full_path = File::Spec->catfile( $cwd, $bin_file );
|
|
|
956 |
#print "bin_file_full_path (was rel) = [$bin_file_full_path]\n";
|
|
|
957 |
}
|
|
|
958 |
my $extract_cmd = uncompress_archive_cmd($bin_file_full_path);
|
|
|
959 |
print $extract_cmd, "\n" if $opt_v;
|
|
|
960 |
system $extract_cmd;
|
|
|
961 |
push @extract_location, $extract_dir;
|
|
|
962 |
chdir $cwd;
|
|
|
963 |
}
|
|
|
964 |
# It is possible that the binary archive itself contains additional
|
|
|
965 |
# files compressed the same way (true for Java .ear files). Go
|
|
|
966 |
# through all the files that were extracted, see if they are binary
|
|
|
967 |
# archives and try to extract them. Lather, rinse, repeat.
|
|
|
968 |
my $binary_archives_exist = 1;
|
|
|
969 |
my $count_binary_archives = 0;
|
|
|
970 |
my $previous_count = 0;
|
|
|
971 |
my $n_pass = 0;
|
|
|
972 |
while ($binary_archives_exist) {
|
|
|
973 |
@binary_archive = ();
|
|
|
974 |
foreach my $dir (@extract_location) {
|
|
|
975 |
find(\&archive_files, $dir); # populates global @binary_archive
|
|
|
976 |
}
|
|
|
977 |
foreach my $archive (@binary_archive) {
|
|
|
978 |
my $extract_dir = undef;
|
|
|
979 |
if ($opt_sdir) {
|
|
|
980 |
++$TEMP_OFF;
|
|
|
981 |
$extract_dir = "$opt_sdir/$TEMP_OFF";
|
|
|
982 |
File::Path::rmtree($extract_dir) if is_dir($extract_dir);
|
|
|
983 |
File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
|
|
|
984 |
} else {
|
|
|
985 |
$extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
|
986 |
}
|
|
|
987 |
$TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
|
|
|
988 |
print "mkdir $extract_dir\n" if $opt_v;
|
|
|
989 |
print "cd $extract_dir\n" if $opt_v;
|
|
|
990 |
chdir $extract_dir;
|
|
|
991 |
|
|
|
992 |
my $extract_cmd = uncompress_archive_cmd($archive);
|
|
|
993 |
print $extract_cmd, "\n" if $opt_v;
|
|
|
994 |
system $extract_cmd;
|
|
|
995 |
push @extract_location, $extract_dir;
|
|
|
996 |
unlink $archive; # otherwise will be extracting it forever
|
|
|
997 |
}
|
|
|
998 |
$count_binary_archives = scalar @binary_archive;
|
|
|
999 |
if ($count_binary_archives == $previous_count) {
|
|
|
1000 |
$binary_archives_exist = 0;
|
|
|
1001 |
}
|
|
|
1002 |
$previous_count = $count_binary_archives;
|
|
|
1003 |
}
|
|
|
1004 |
chdir $cwd;
|
|
|
1005 |
|
|
|
1006 |
@ARGV = @extract_location;
|
|
|
1007 |
} else {
|
|
|
1008 |
# see if any of the inputs need to be auto-uncompressed &/or expanded
|
|
|
1009 |
my @updated_ARGS = ();
|
|
|
1010 |
foreach my $Arg (@ARGV) {
|
|
|
1011 |
if (is_dir($Arg)) {
|
|
|
1012 |
push @updated_ARGS, $Arg;
|
|
|
1013 |
next;
|
|
|
1014 |
}
|
|
|
1015 |
my $full_path = "";
|
|
|
1016 |
if (File::Spec->file_name_is_absolute( $Arg )) {
|
|
|
1017 |
$full_path = $Arg;
|
|
|
1018 |
} else {
|
|
|
1019 |
$full_path = File::Spec->catfile( $cwd, $Arg );
|
|
|
1020 |
}
|
|
|
1021 |
#print "full_path = [$full_path]\n";
|
|
|
1022 |
my $extract_cmd = uncompress_archive_cmd($full_path);
|
|
|
1023 |
if ($extract_cmd) {
|
|
|
1024 |
my $extract_dir = undef;
|
|
|
1025 |
if ($opt_sdir) {
|
|
|
1026 |
++$TEMP_OFF;
|
|
|
1027 |
$extract_dir = "$opt_sdir/$TEMP_OFF";
|
|
|
1028 |
File::Path::rmtree($extract_dir) if is_dir($extract_dir);
|
|
|
1029 |
File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
|
|
|
1030 |
} else {
|
|
|
1031 |
$extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
|
1032 |
}
|
|
|
1033 |
$TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
|
|
|
1034 |
print "mkdir $extract_dir\n" if $opt_v;
|
|
|
1035 |
print "cd $extract_dir\n" if $opt_v;
|
|
|
1036 |
chdir $extract_dir;
|
|
|
1037 |
print $extract_cmd, "\n" if $opt_v;
|
|
|
1038 |
system $extract_cmd;
|
|
|
1039 |
push @updated_ARGS, $extract_dir;
|
|
|
1040 |
chdir $cwd;
|
|
|
1041 |
} else {
|
|
|
1042 |
# this is a conventional, uncompressed, unarchived file
|
|
|
1043 |
# or a directory; keep as-is
|
|
|
1044 |
push @updated_ARGS, $Arg;
|
|
|
1045 |
}
|
|
|
1046 |
}
|
|
|
1047 |
@ARGV = @updated_ARGS;
|
|
|
1048 |
|
|
|
1049 |
# make sure we're not counting any directory containing
|
|
|
1050 |
# temporary installations of Regexp::Common, Algorithm::Diff
|
|
|
1051 |
foreach my $d (sort keys %TEMP_INST) {
|
|
|
1052 |
foreach my $a (@ARGV) {
|
|
|
1053 |
next unless is_dir($a);
|
|
|
1054 |
if ($opt_v > 2) {
|
|
|
1055 |
printf "Comparing %s (location of %s) to input [%s]\n",
|
|
|
1056 |
$d, $TEMP_INST{$d}, $a;
|
|
|
1057 |
}
|
|
|
1058 |
if ($a eq $d) {
|
|
|
1059 |
die "File::Temp::tempdir chose directory ",
|
|
|
1060 |
$d, " to install ", $TEMP_INST{$d}, " but this ",
|
|
|
1061 |
"matches one of your input directories. Rerun ",
|
|
|
1062 |
"with --sdir and supply a different temporary ",
|
|
|
1063 |
"directory for ", $TEMP_INST{$d}, "\n";
|
|
|
1064 |
}
|
|
|
1065 |
}
|
|
|
1066 |
}
|
|
|
1067 |
}
|
|
|
1068 |
# 1}}}
|
|
|
1069 |
my @Errors = ();
|
|
|
1070 |
my @file_list = (); # global variable updated in files()
|
|
|
1071 |
my %Ignored = (); # files that are not counted (language not recognized or
|
|
|
1072 |
# problems reading the file)
|
|
|
1073 |
my @Lines_Out = ();
|
|
|
1074 |
if ($opt_diff) {
|
|
|
1075 |
# Step 4: Separate code from non-code files. {{{1
|
|
|
1076 |
my @fh = ();
|
|
|
1077 |
my @files_for_set = ();
|
|
|
1078 |
# make file lists for each separate argument
|
|
|
1079 |
for (my $i = 0; $i < scalar @ARGV; $i++) {
|
|
|
1080 |
push @fh,
|
|
|
1081 |
make_file_list([ $ARGV[$i] ], \%Error_Codes, \@Errors, \%Ignored);
|
|
|
1082 |
@{$files_for_set[$i]} = @file_list;
|
|
|
1083 |
if ($opt_exclude_list_file) {
|
|
|
1084 |
# note: process_exclude_list_file() references global @file_list
|
|
|
1085 |
process_exclude_list_file($opt_exclude_list_file,
|
|
|
1086 |
\%Exclude_Dir,
|
|
|
1087 |
\%Ignored);
|
|
|
1088 |
}
|
|
|
1089 |
@file_list = ();
|
|
|
1090 |
}
|
|
|
1091 |
# 1}}}
|
|
|
1092 |
# Step 5: Remove duplicate files. {{{1
|
|
|
1093 |
#
|
|
|
1094 |
my %Language = ();
|
|
|
1095 |
my %unique_source_file = ();
|
|
|
1096 |
my $n_set = 0;
|
|
|
1097 |
foreach my $FH (@fh) { # loop over each pair of file sets
|
|
|
1098 |
++$n_set;
|
|
|
1099 |
remove_duplicate_files($FH,
|
|
|
1100 |
\%{$Language{$FH}} ,
|
|
|
1101 |
\%{$unique_source_file{$FH}} ,
|
|
|
1102 |
\%Error_Codes ,
|
|
|
1103 |
\@Errors ,
|
|
|
1104 |
\%Ignored );
|
|
|
1105 |
printf "%2d: %8d unique file%s. \r",
|
|
|
1106 |
$n_set,
|
|
|
1107 |
plural_form(scalar keys %unique_source_file)
|
|
|
1108 |
unless $opt_quiet;
|
|
|
1109 |
}
|
|
|
1110 |
# 1}}}
|
|
|
1111 |
# Step 6: Count code, comments, blank lines. {{{1
|
|
|
1112 |
#
|
|
|
1113 |
my %Results_by_Language = ();
|
|
|
1114 |
my %Results_by_File = ();
|
|
|
1115 |
my %Delta_by_Language = ();
|
|
|
1116 |
my %Delta_by_File = ();
|
|
|
1117 |
|
|
|
1118 |
foreach (my $F = 0; $F < scalar @fh - 1; $F++) {
|
|
|
1119 |
# loop over file sets; do diff between set $F to $F+1
|
|
|
1120 |
|
|
|
1121 |
my $nCounted = 0;
|
|
|
1122 |
|
|
|
1123 |
my @file_pairs = ();
|
|
|
1124 |
my @files_added = ();
|
|
|
1125 |
my @files_removed = ();
|
|
|
1126 |
|
|
|
1127 |
align_by_pairs(\%{$unique_source_file{$fh[$F ]}} , # in
|
|
|
1128 |
\%{$unique_source_file{$fh[$F+1]}} , # in
|
|
|
1129 |
\@files_added , # out
|
|
|
1130 |
\@files_removed , # out
|
|
|
1131 |
\@file_pairs , # out
|
|
|
1132 |
);
|
|
|
1133 |
my %already_counted = (); # already_counted{ filename } = 1
|
|
|
1134 |
|
|
|
1135 |
if (!@file_pairs) {
|
|
|
1136 |
# Special case where all files were either added or deleted.
|
|
|
1137 |
# In this case, one of these arrays will be empty:
|
|
|
1138 |
# @files_added, @files_removed
|
|
|
1139 |
# so loop over both to cover both cases.
|
|
|
1140 |
my $status = @files_added ? 'added' : 'removed';
|
|
|
1141 |
my $offset = @files_added ? 1 : 0 ;
|
|
|
1142 |
foreach my $file (@files_added, @files_removed) {
|
|
|
1143 |
next unless defined $Language{$fh[$F+$offset]}{$file};
|
|
|
1144 |
my $Lang = $Language{$fh[$F+$offset]}{$file};
|
|
|
1145 |
next if $Lang eq '(unknown)';
|
|
|
1146 |
my ($all_line_count,
|
|
|
1147 |
$blank_count ,
|
|
|
1148 |
$comment_count ,
|
|
|
1149 |
) = call_counter($file, $Lang, \@Errors);
|
|
|
1150 |
$already_counted{$file} = 1;
|
|
|
1151 |
my $code_count = $all_line_count-$blank_count-$comment_count;
|
|
|
1152 |
if ($opt_by_file) {
|
|
|
1153 |
$Delta_by_File{$file}{'code' }{$status} += $code_count ;
|
|
|
1154 |
$Delta_by_File{$file}{'blank' }{$status} += $blank_count ;
|
|
|
1155 |
$Delta_by_File{$file}{'comment'}{$status} += $comment_count;
|
|
|
1156 |
$Delta_by_File{$file}{'lang' }{$status} = $Lang ;
|
|
|
1157 |
$Delta_by_File{$file}{'nFiles' }{$status} += 1 ;
|
|
|
1158 |
}
|
|
|
1159 |
$Delta_by_Language{$Lang}{'code' }{$status} += $code_count ;
|
|
|
1160 |
$Delta_by_Language{$Lang}{'blank' }{$status} += $blank_count ;
|
|
|
1161 |
$Delta_by_Language{$Lang}{'comment'}{$status} += $comment_count;
|
|
|
1162 |
$Delta_by_Language{$Lang}{'nFiles' }{$status} += 1 ;
|
|
|
1163 |
}
|
|
|
1164 |
}
|
|
|
1165 |
#use Data::Dumper::Simple;
|
|
|
1166 |
#use Data::Dumper;
|
|
|
1167 |
#print Dumper(\@files_added, \@files_removed, \@file_pairs);
|
|
|
1168 |
my @alignment = (); # only used if --diff-alignment
|
|
|
1169 |
#print "after align_by_pairs:\n";
|
|
|
1170 |
|
|
|
1171 |
#print "added:\n";
|
|
|
1172 |
push @alignment, sprintf "Files added: %d\n", scalar @files_added
|
|
|
1173 |
if $opt_diff_alignment;
|
|
|
1174 |
foreach my $f (@files_added) {
|
|
|
1175 |
next if $already_counted{$f};
|
|
|
1176 |
#printf "%10s -> %s\n", $f, $Language{$fh[$F+1]}{$f};
|
|
|
1177 |
# Don't proceed unless the file (both L and R versions)
|
|
|
1178 |
# is in a known language.
|
|
|
1179 |
next if $opt_include_lang
|
|
|
1180 |
and not $Include_Language{$Language{$fh[$F+1]}{$f}};
|
|
|
1181 |
next if $Language{$fh[$F+1]}{$f} eq "(unknown)";
|
|
|
1182 |
next if $Exclude_Language{$Language{$fh[$F+1]}{$f}};
|
|
|
1183 |
push @alignment, sprintf " + %s ; %s\n", $f, $Language{$fh[$F+1]}{$f}
|
|
|
1184 |
if $opt_diff_alignment;
|
|
|
1185 |
++$Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'nFiles'}{'added'};
|
|
|
1186 |
# Additionally, add contents of file $f to
|
|
|
1187 |
# Delta_by_File{$f}{comment/blank/code}{'added'}
|
|
|
1188 |
# Delta_by_Language{$lang}{comment/blank/code}{'added'}
|
|
|
1189 |
my ($all_line_count,
|
|
|
1190 |
$blank_count ,
|
|
|
1191 |
$comment_count ,
|
|
|
1192 |
) = call_counter($f, $Language{$fh[$F+1]}{$f}, \@Errors);
|
|
|
1193 |
$Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'comment'}{'added'} +=
|
|
|
1194 |
$comment_count;
|
|
|
1195 |
$Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'blank'}{'added'} +=
|
|
|
1196 |
$blank_count;
|
|
|
1197 |
$Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'code'}{'added'} +=
|
|
|
1198 |
$all_line_count - $blank_count - $comment_count;
|
|
|
1199 |
$Delta_by_File{ $f }{'comment'}{'added'} = $comment_count;
|
|
|
1200 |
$Delta_by_File{ $f }{'blank'}{'added'} = $blank_count;
|
|
|
1201 |
$Delta_by_File{ $f }{'code'}{'added'} =
|
|
|
1202 |
$all_line_count - $blank_count - $comment_count;
|
|
|
1203 |
}
|
|
|
1204 |
push @alignment, "\n";
|
|
|
1205 |
|
|
|
1206 |
#print "removed:\n";
|
|
|
1207 |
push @alignment, sprintf "Files removed: %d\n", scalar @files_removed
|
|
|
1208 |
if $opt_diff_alignment;
|
|
|
1209 |
foreach my $f (@files_removed) {
|
|
|
1210 |
next if $already_counted{$f};
|
|
|
1211 |
# Don't proceed unless the file (both L and R versions)
|
|
|
1212 |
# is in a known language.
|
|
|
1213 |
next if $opt_include_lang
|
|
|
1214 |
and not $Include_Language{$Language{$fh[$F]}{$f}};
|
|
|
1215 |
next if $Language{$fh[$F]}{$f} eq "(unknown)";
|
|
|
1216 |
next if $Exclude_Language{$Language{$fh[$F]}{$f}};
|
|
|
1217 |
++$Delta_by_Language{ $Language{$fh[$F]}{$f} }{'nFiles'}{'removed'};
|
|
|
1218 |
push @alignment, sprintf " - %s ; %s\n", $f, $Language{$fh[$F]}{$f}
|
|
|
1219 |
if $opt_diff_alignment;
|
|
|
1220 |
#printf "%10s -> %s\n", $f, $Language{$fh[$F ]}{$f};
|
|
|
1221 |
# Additionally, add contents of file $f to
|
|
|
1222 |
# Delta_by_File{$f}{comment/blank/code}{'removed'}
|
|
|
1223 |
# Delta_by_Language{$lang}{comment/blank/code}{'removed'}
|
|
|
1224 |
my ($all_line_count,
|
|
|
1225 |
$blank_count ,
|
|
|
1226 |
$comment_count ,
|
|
|
1227 |
) = call_counter($f, $Language{$fh[$F ]}{$f}, \@Errors);
|
|
|
1228 |
$Delta_by_Language{ $Language{$fh[$F ]}{$f} }{'comment'}{'removed'} +=
|
|
|
1229 |
$comment_count;
|
|
|
1230 |
$Delta_by_Language{ $Language{$fh[$F ]}{$f} }{'blank'}{'removed'} +=
|
|
|
1231 |
$blank_count;
|
|
|
1232 |
$Delta_by_Language{ $Language{$fh[$F ]}{$f} }{'code'}{'removed'} +=
|
|
|
1233 |
$all_line_count - $blank_count - $comment_count;
|
|
|
1234 |
$Delta_by_File{ $f }{'comment'}{'removed'} = $comment_count;
|
|
|
1235 |
$Delta_by_File{ $f }{'blank'}{'removed'} = $blank_count;
|
|
|
1236 |
$Delta_by_File{ $f }{'code'}{'removed'} =
|
|
|
1237 |
$all_line_count - $blank_count - $comment_count;
|
|
|
1238 |
}
|
|
|
1239 |
push @alignment, "\n";
|
|
|
1240 |
|
|
|
1241 |
my $alignment_pairs_index = scalar @alignment;
|
|
|
1242 |
my $n_file_pairs_compared = 0;
|
|
|
1243 |
# Don't know ahead of time how many file pairs will be compared
|
|
|
1244 |
# since duplicates are weeded out below. The answer is
|
|
|
1245 |
# scalar @file_pairs only if there are no duplicates.
|
|
|
1246 |
push @alignment, sprintf "File pairs compared: UPDATE_ME\n"
|
|
|
1247 |
if $opt_diff_alignment;
|
|
|
1248 |
|
|
|
1249 |
foreach my $pair (@file_pairs) {
|
|
|
1250 |
my $file_L = $pair->[0];
|
|
|
1251 |
my $file_R = $pair->[1];
|
|
|
1252 |
my $Lang_L = $Language{$fh[$F ]}{$file_L};
|
|
|
1253 |
my $Lang_R = $Language{$fh[$F+1]}{$file_R};
|
|
|
1254 |
#print "main step 6 file_L=$file_L file_R=$file_R\n";
|
|
|
1255 |
++$nCounted;
|
|
|
1256 |
printf "Counting: %d\r", $nCounted
|
|
|
1257 |
unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate));
|
|
|
1258 |
next if $Ignored{$file_L};
|
|
|
1259 |
# filter out non-included languages
|
|
|
1260 |
if ($opt_include_lang and not $Include_Language{$Lang_L}
|
|
|
1261 |
and not $Include_Language{$Lang_R}) {
|
|
|
1262 |
$Ignored{$file_L} = "--include-lang=$Lang_L";
|
|
|
1263 |
$Ignored{$file_R} = "--include-lang=$Lang_R";
|
|
|
1264 |
next;
|
|
|
1265 |
}
|
|
|
1266 |
# filter out excluded or unrecognized languages
|
|
|
1267 |
if ($Exclude_Language{$Lang_L} or $Exclude_Language{$Lang_R}) {
|
|
|
1268 |
$Ignored{$file_L} = "--exclude-lang=$Lang_L";
|
|
|
1269 |
$Ignored{$file_R} = "--exclude-lang=$Lang_R";
|
|
|
1270 |
next;
|
|
|
1271 |
}
|
|
|
1272 |
my $not_Filters_by_Language_Lang_LR = 0;
|
|
|
1273 |
#print "file_LR = [$file_L] [$file_R]\n";
|
|
|
1274 |
#print "Lang_LR = [$Lang_L] [$Lang_R]\n";
|
|
|
1275 |
if (!(@{$Filters_by_Language{$Lang_L} }) or
|
|
|
1276 |
!(@{$Filters_by_Language{$Lang_R} })) {
|
|
|
1277 |
$not_Filters_by_Language_Lang_LR = 1;
|
|
|
1278 |
}
|
|
|
1279 |
if ($not_Filters_by_Language_Lang_LR) {
|
|
|
1280 |
if (($Lang_L eq "(unknown)") or ($Lang_R eq "(unknown)")) {
|
|
|
1281 |
$Ignored{$fh[$F ]}{$file_L} = "language unknown (#1)";
|
|
|
1282 |
$Ignored{$fh[$F+1]}{$file_R} = "language unknown (#1)";
|
|
|
1283 |
} else {
|
|
|
1284 |
$Ignored{$fh[$F ]}{$file_L} = "missing Filters_by_Language{$Lang_L}";
|
|
|
1285 |
$Ignored{$fh[$F+1]}{$file_R} = "missing Filters_by_Language{$Lang_R}";
|
|
|
1286 |
}
|
|
|
1287 |
next;
|
|
|
1288 |
}
|
|
|
1289 |
|
|
|
1290 |
#print "DIFF($file_L, $file_R)\n";
|
|
|
1291 |
# step 0: compare the two files' contents
|
|
|
1292 |
chomp ( my @lines_L = read_file($file_L) );
|
|
|
1293 |
chomp ( my @lines_R = read_file($file_R) );
|
|
|
1294 |
my $language_file_L = "";
|
|
|
1295 |
if (defined $Language{$fh[$F]}{$file_L}) {
|
|
|
1296 |
$language_file_L = $Language{$fh[$F]}{$file_L};
|
|
|
1297 |
} else {
|
|
|
1298 |
# files $file_L and $file_R do not contain known language
|
|
|
1299 |
next;
|
|
|
1300 |
}
|
|
|
1301 |
my $contents_are_same = 1;
|
|
|
1302 |
if (scalar @lines_L == scalar @lines_R) {
|
|
|
1303 |
# same size, must compare line-by-line
|
|
|
1304 |
for (my $i = 0; $i < scalar @lines_L; $i++) {
|
|
|
1305 |
if ($lines_L[$i] ne $lines_R[$i]) {
|
|
|
1306 |
$contents_are_same = 0;
|
|
|
1307 |
last;
|
|
|
1308 |
}
|
|
|
1309 |
}
|
|
|
1310 |
if ($contents_are_same) {
|
|
|
1311 |
++$Delta_by_Language{$language_file_L}{'nFiles'}{'same'};
|
|
|
1312 |
} else {
|
|
|
1313 |
++$Delta_by_Language{$language_file_L}{'nFiles'}{'modified'};
|
|
|
1314 |
}
|
|
|
1315 |
} else {
|
|
|
1316 |
$contents_are_same = 0;
|
|
|
1317 |
# different sizes, contents have changed
|
|
|
1318 |
++$Delta_by_Language{$language_file_L}{'nFiles'}{'modified'};
|
|
|
1319 |
}
|
|
|
1320 |
if ($opt_diff_alignment) {
|
|
|
1321 |
my $str = "$file_L | $file_R ; $language_file_L";
|
|
|
1322 |
if ($contents_are_same) {
|
|
|
1323 |
push @alignment, " == $str";
|
|
|
1324 |
} else {
|
|
|
1325 |
push @alignment, " != $str";
|
|
|
1326 |
}
|
|
|
1327 |
++$n_file_pairs_compared;
|
|
|
1328 |
}
|
|
|
1329 |
|
|
|
1330 |
# step 1: identify comments in both files
|
|
|
1331 |
#print "Diff blank removal L language= $Lang_L";
|
|
|
1332 |
#print " scalar(lines_L)=", scalar @lines_L, "\n";
|
|
|
1333 |
my @original_minus_blanks_L
|
|
|
1334 |
= rm_blanks( \@lines_L, $Lang_L, \%EOL_Continuation_re);
|
|
|
1335 |
#print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n";
|
|
|
1336 |
@lines_L = @original_minus_blanks_L;
|
|
|
1337 |
#print "2: scalar(lines_L)=", scalar @lines_L, "\n";
|
|
|
1338 |
@lines_L = add_newlines(\@lines_L); # compensate for rm_comments()
|
|
|
1339 |
@lines_L = rm_comments( \@lines_L, $Lang_L, $file_L,
|
|
|
1340 |
\%EOL_Continuation_re);
|
|
|
1341 |
#print "3: scalar(lines_L)=", scalar @lines_L, "\n";
|
|
|
1342 |
|
|
|
1343 |
#print "Diff blank removal R language= $Lang_R\n";
|
|
|
1344 |
my @original_minus_blanks_R
|
|
|
1345 |
= rm_blanks( \@lines_R, $Lang_R, \%EOL_Continuation_re);
|
|
|
1346 |
@lines_R = @original_minus_blanks_R;
|
|
|
1347 |
@lines_R = add_newlines(\@lines_R); # taken away by rm_comments()
|
|
|
1348 |
@lines_R = rm_comments( \@lines_R, $Lang_R, $file_R,
|
|
|
1349 |
\%EOL_Continuation_re);
|
|
|
1350 |
|
|
|
1351 |
my (@diff_LL, @diff_LR, );
|
|
|
1352 |
array_diff( $file_L , # in
|
|
|
1353 |
\@original_minus_blanks_L , # in
|
|
|
1354 |
\@lines_L , # in
|
|
|
1355 |
"comment" , # in
|
|
|
1356 |
\@diff_LL, \@diff_LR , # out
|
|
|
1357 |
\@Errors); # in/out
|
|
|
1358 |
|
|
|
1359 |
my (@diff_RL, @diff_RR, );
|
|
|
1360 |
array_diff( $file_R , # in
|
|
|
1361 |
\@original_minus_blanks_R , # in
|
|
|
1362 |
\@lines_R , # in
|
|
|
1363 |
"comment" , # in
|
|
|
1364 |
\@diff_RL, \@diff_RR , # out
|
|
|
1365 |
\@Errors); # in/out
|
|
|
1366 |
# each line of each file is now classified as
|
|
|
1367 |
# code or comment
|
|
|
1368 |
|
|
|
1369 |
#use Data::Dumper;
|
|
|
1370 |
#print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
|
|
|
1371 |
#print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, );
|
|
|
1372 |
#die;
|
|
|
1373 |
# step 2: separate code from comments for L and R files
|
|
|
1374 |
my @code_L = ();
|
|
|
1375 |
my @code_R = ();
|
|
|
1376 |
my @comm_L = ();
|
|
|
1377 |
my @comm_R = ();
|
|
|
1378 |
foreach my $line_info (@diff_LL) {
|
|
|
1379 |
if ($line_info->{'type'} eq "code" ) {
|
|
|
1380 |
push @code_L, $line_info->{char};
|
|
|
1381 |
} elsif ($line_info->{'type'} eq "comment") {
|
|
|
1382 |
push @comm_L, $line_info->{char};
|
|
|
1383 |
} else {
|
|
|
1384 |
die "Diff unexpected line type ",
|
|
|
1385 |
$line_info->{'type'}, "for $file_L line ",
|
|
|
1386 |
$line_info->{'lnum'};
|
|
|
1387 |
}
|
|
|
1388 |
}
|
|
|
1389 |
foreach my $line_info (@diff_RL) {
|
|
|
1390 |
if ($line_info->{type} eq "code" ) {
|
|
|
1391 |
push @code_R, $line_info->{'char'};
|
|
|
1392 |
} elsif ($line_info->{type} eq "comment") {
|
|
|
1393 |
push @comm_R, $line_info->{'char'};
|
|
|
1394 |
} else {
|
|
|
1395 |
die "Diff unexpected line type ",
|
|
|
1396 |
$line_info->{'type'}, "for $file_R line ",
|
|
|
1397 |
$line_info->{'lnum'};
|
|
|
1398 |
}
|
|
|
1399 |
}
|
|
|
1400 |
|
|
|
1401 |
if ($opt_ignore_whitespace) {
|
|
|
1402 |
# strip all whitespace from each line of source code
|
|
|
1403 |
# and comments then use these stripped arrays in diffs
|
|
|
1404 |
foreach (@code_L) { s/\s+//g }
|
|
|
1405 |
foreach (@code_R) { s/\s+//g }
|
|
|
1406 |
foreach (@comm_L) { s/\s+//g }
|
|
|
1407 |
foreach (@comm_R) { s/\s+//g }
|
|
|
1408 |
}
|
|
|
1409 |
if ($opt_ignore_case) {
|
|
|
1410 |
# change all text to lowercase in diffs
|
|
|
1411 |
foreach (@code_L) { $_ = lc }
|
|
|
1412 |
foreach (@code_R) { $_ = lc }
|
|
|
1413 |
foreach (@comm_L) { $_ = lc }
|
|
|
1414 |
foreach (@comm_R) { $_ = lc }
|
|
|
1415 |
}
|
|
|
1416 |
# step 3: compute code diffs
|
|
|
1417 |
array_diff("$file_L v. $file_R" , # in
|
|
|
1418 |
\@code_L , # in
|
|
|
1419 |
\@code_R , # in
|
|
|
1420 |
"revision" , # in
|
|
|
1421 |
\@diff_LL, \@diff_LR , # out
|
|
|
1422 |
\@Errors); # in/out
|
|
|
1423 |
#print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
|
|
|
1424 |
#print Dumper("diff_LR", \@diff_LR);
|
|
|
1425 |
foreach my $line_info (@diff_LR) {
|
|
|
1426 |
my $status = $line_info->{'desc'}; # same|added|removed|modified
|
|
|
1427 |
++$Delta_by_Language{$Lang_L}{'code'}{$status};
|
|
|
1428 |
if ($opt_by_file) {
|
|
|
1429 |
++$Delta_by_File{$file_L}{'code'}{$status};
|
|
|
1430 |
}
|
|
|
1431 |
}
|
|
|
1432 |
#use Data::Dumper;
|
|
|
1433 |
#print Dumper("code diffs:", \@diff_LL, \@diff_LR);
|
|
|
1434 |
|
|
|
1435 |
# step 4: compute comment diffs
|
|
|
1436 |
array_diff("$file_L v. $file_R" , # in
|
|
|
1437 |
\@comm_L , # in
|
|
|
1438 |
\@comm_R , # in
|
|
|
1439 |
"revision" , # in
|
|
|
1440 |
\@diff_LL, \@diff_LR , # out
|
|
|
1441 |
\@Errors); # in/out
|
|
|
1442 |
#print Dumper("comment diff_LR", \@diff_LR);
|
|
|
1443 |
foreach my $line_info (@diff_LR) {
|
|
|
1444 |
my $status = $line_info->{'desc'}; # same|added|removed|modified
|
|
|
1445 |
++$Delta_by_Language{$Lang_L}{'comment'}{$status};
|
|
|
1446 |
if ($opt_by_file) {
|
|
|
1447 |
++$Delta_by_File{$file_L}{'comment'}{$status};
|
|
|
1448 |
}
|
|
|
1449 |
}
|
|
|
1450 |
#print Dumper("comment diffs:", \@diff_LL, \@diff_LR);
|
|
|
1451 |
#die; here= need to save original line number in diff result for html display
|
|
|
1452 |
|
|
|
1453 |
# step 5: compute difference in blank lines (kind of pointless)
|
|
|
1454 |
next if $Lang_L eq '(unknown)' or
|
|
|
1455 |
$Lang_R eq '(unknown)';
|
|
|
1456 |
my ($all_line_count_L,
|
|
|
1457 |
$blank_count_L ,
|
|
|
1458 |
$comment_count_L ,
|
|
|
1459 |
) = call_counter($file_L, $Lang_L, \@Errors);
|
|
|
1460 |
|
|
|
1461 |
my ($all_line_count_R,
|
|
|
1462 |
$blank_count_R ,
|
|
|
1463 |
$comment_count_R ,
|
|
|
1464 |
) = call_counter($file_R, $Lang_R, \@Errors);
|
|
|
1465 |
|
|
|
1466 |
if ($blank_count_L < $blank_count_R) {
|
|
|
1467 |
my $D = $blank_count_R - $blank_count_L;
|
|
|
1468 |
$Delta_by_Language{$Lang_L}{'blank'}{'added'} += $D;
|
|
|
1469 |
} else {
|
|
|
1470 |
my $D = $blank_count_L - $blank_count_R;
|
|
|
1471 |
$Delta_by_Language{$Lang_L}{'blank'}{'removed'} += $D;
|
|
|
1472 |
}
|
|
|
1473 |
if ($opt_by_file) {
|
|
|
1474 |
if ($blank_count_L < $blank_count_R) {
|
|
|
1475 |
my $D = $blank_count_R - $blank_count_L;
|
|
|
1476 |
$Delta_by_File{$file_L}{'blank'}{'added'} += $D;
|
|
|
1477 |
} else {
|
|
|
1478 |
my $D = $blank_count_L - $blank_count_R;
|
|
|
1479 |
$Delta_by_File{$file_L}{'blank'}{'removed'} += $D;
|
|
|
1480 |
}
|
|
|
1481 |
}
|
|
|
1482 |
|
|
|
1483 |
my $code_count_L = $all_line_count_L-$blank_count_L-$comment_count_L;
|
|
|
1484 |
if ($opt_by_file) {
|
|
|
1485 |
$Results_by_File{$file_L}{'code' } = $code_count_L ;
|
|
|
1486 |
$Results_by_File{$file_L}{'blank' } = $blank_count_L ;
|
|
|
1487 |
$Results_by_File{$file_L}{'comment'} = $comment_count_L ;
|
|
|
1488 |
$Results_by_File{$file_L}{'lang' } = $Lang_L ;
|
|
|
1489 |
$Results_by_File{$file_L}{'nFiles' } = 1 ;
|
|
|
1490 |
} else {
|
|
|
1491 |
$Results_by_File{$file_L} = 1; # just keep track of counted files
|
|
|
1492 |
}
|
|
|
1493 |
|
|
|
1494 |
$Results_by_Language{$Lang_L}{'nFiles'}++;
|
|
|
1495 |
$Results_by_Language{$Lang_L}{'code'} += $code_count_L ;
|
|
|
1496 |
$Results_by_Language{$Lang_L}{'blank'} += $blank_count_L ;
|
|
|
1497 |
$Results_by_Language{$Lang_L}{'comment'} += $comment_count_L;
|
|
|
1498 |
}
|
|
|
1499 |
if ($opt_diff_alignment) {
|
|
|
1500 |
$alignment[$alignment_pairs_index] =~ s/UPDATE_ME/$n_file_pairs_compared/;
|
|
|
1501 |
write_file($opt_diff_alignment, @alignment);
|
|
|
1502 |
}
|
|
|
1503 |
|
|
|
1504 |
}
|
|
|
1505 |
#use Data::Dumper;
|
|
|
1506 |
#print Dumper("Delta_by_Language:" , \%Delta_by_Language);
|
|
|
1507 |
#print Dumper("Results_by_Language:", \%Results_by_Language);
|
|
|
1508 |
#print Dumper("Delta_by_File:" , \%Delta_by_File);
|
|
|
1509 |
#print Dumper("Results_by_File:" , \%Results_by_File);
|
|
|
1510 |
#die;
|
|
|
1511 |
my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
|
|
|
1512 |
write_file($opt_ignored, @ignored_reasons ) if $opt_ignored;
|
|
|
1513 |
write_file($opt_counted, sort keys %Results_by_File) if $opt_counted;
|
|
|
1514 |
# 1}}}
|
|
|
1515 |
# Step 7: Assemble results. {{{1
|
|
|
1516 |
#
|
|
|
1517 |
my $end_time = get_time();
|
|
|
1518 |
printf "%8d file%s ignored. \n",
|
|
|
1519 |
plural_form(scalar keys %Ignored) unless $opt_quiet;
|
|
|
1520 |
print_errors(\%Error_Codes, \@Errors) if @Errors;
|
|
|
1521 |
if (!%Delta_by_Language) {
|
|
|
1522 |
print "Nothing to count.\n";
|
|
|
1523 |
exit;
|
|
|
1524 |
}
|
|
|
1525 |
|
|
|
1526 |
if ($opt_by_file) {
|
|
|
1527 |
@Lines_Out = diff_report($VERSION, get_time() - $start_time,
|
|
|
1528 |
"by file",
|
|
|
1529 |
\%Delta_by_File, \%Scale_Factor);
|
|
|
1530 |
} else {
|
|
|
1531 |
@Lines_Out = diff_report($VERSION, get_time() - $start_time,
|
|
|
1532 |
"by language",
|
|
|
1533 |
\%Delta_by_Language, \%Scale_Factor);
|
|
|
1534 |
}
|
|
|
1535 |
|
|
|
1536 |
# 1}}}
|
|
|
1537 |
} else {
|
|
|
1538 |
# Step 4: Separate code from non-code files. {{{1
|
|
|
1539 |
my $fh = 0;
|
|
|
1540 |
if ($opt_list_file) {
|
|
|
1541 |
my @list = read_list_file($opt_list_file);
|
|
|
1542 |
$fh = make_file_list(\@list, \%Error_Codes, \@Errors, \%Ignored);
|
|
|
1543 |
} else {
|
|
|
1544 |
$fh = make_file_list(\@ARGV, \%Error_Codes, \@Errors, \%Ignored);
|
|
|
1545 |
# make_file_list populates global variable @file_list via call to
|
|
|
1546 |
# File::Find's find() which in turn calls files()
|
|
|
1547 |
}
|
|
|
1548 |
if ($opt_exclude_list_file) {
|
|
|
1549 |
# note: process_exclude_list_file() references global @file_list
|
|
|
1550 |
process_exclude_list_file($opt_exclude_list_file,
|
|
|
1551 |
\%Exclude_Dir,
|
|
|
1552 |
\%Ignored);
|
|
|
1553 |
}
|
|
|
1554 |
if ($opt_skip_win_hidden and $ON_WINDOWS) {
|
|
|
1555 |
my @file_list_minus_hidded = ();
|
|
|
1556 |
# eval code to run on Unix without 'missing Win32::File module' error.
|
|
|
1557 |
my $win32_file_invocation = '
|
|
|
1558 |
use Win32::File;
|
|
|
1559 |
foreach my $F (@file_list) {
|
|
|
1560 |
my $attr = undef;
|
|
|
1561 |
Win32::File::GetAttributes($F, $attr);
|
|
|
1562 |
if ($attr & HIDDEN) {
|
|
|
1563 |
$Ignored{$F} = "Windows hidden file";
|
|
|
1564 |
print "Ignoring $F since it is a Windows hidden file\n"
|
|
|
1565 |
if $opt_v > 1;
|
|
|
1566 |
} else {
|
|
|
1567 |
push @file_list_minus_hidded, $F;
|
|
|
1568 |
}
|
|
|
1569 |
}';
|
|
|
1570 |
eval $win32_file_invocation;
|
|
|
1571 |
@file_list = @file_list_minus_hidded;
|
|
|
1572 |
}
|
|
|
1573 |
#printf "%8d file%s excluded. \n",
|
|
|
1574 |
# plural_form(scalar keys %Ignored)
|
|
|
1575 |
# unless $opt_quiet;
|
|
|
1576 |
# die print ": ", join("\n: ", @file_list), "\n";
|
|
|
1577 |
# 1}}}
|
|
|
1578 |
# Step 5: Remove duplicate files. {{{1
|
|
|
1579 |
#
|
|
|
1580 |
my %Language = ();
|
|
|
1581 |
my %unique_source_file = ();
|
|
|
1582 |
remove_duplicate_files($fh , # in
|
|
|
1583 |
\%Language , # out
|
|
|
1584 |
\%unique_source_file , # out
|
|
|
1585 |
\%Error_Codes , # in
|
|
|
1586 |
\@Errors , # out
|
|
|
1587 |
\%Ignored ); # out
|
|
|
1588 |
printf "%8d unique file%s. \n",
|
|
|
1589 |
plural_form(scalar keys %unique_source_file)
|
|
|
1590 |
unless $opt_quiet;
|
|
|
1591 |
# 1}}}
|
|
|
1592 |
# Step 6: Count code, comments, blank lines. {{{1
|
|
|
1593 |
#
|
|
|
1594 |
|
|
|
1595 |
my %Results_by_Language = ();
|
|
|
1596 |
my %Results_by_File = ();
|
|
|
1597 |
my $nCounted = 0;
|
|
|
1598 |
foreach my $file (sort keys %unique_source_file) {
|
|
|
1599 |
++$nCounted;
|
|
|
1600 |
printf "Counting: %d\r", $nCounted
|
|
|
1601 |
unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate));
|
|
|
1602 |
next if $Ignored{$file};
|
|
|
1603 |
if ($opt_include_lang and not $Include_Language{$Language{$file}}) {
|
|
|
1604 |
$Ignored{$file} = "--include-lang=$Language{$file}";
|
|
|
1605 |
next;
|
|
|
1606 |
}
|
|
|
1607 |
if ($Exclude_Language{$Language{$file}}) {
|
|
|
1608 |
$Ignored{$file} = "--exclude-lang=$Language{$file}";
|
|
|
1609 |
next;
|
|
|
1610 |
}
|
|
|
1611 |
my $Filters_by_Language_Language_file = ! @{$Filters_by_Language{$Language{$file}} };
|
|
|
1612 |
if ($Filters_by_Language_Language_file) {
|
|
|
1613 |
if ($Language{$file} eq "(unknown)") {
|
|
|
1614 |
$Ignored{$file} = "language unknown (#1)";
|
|
|
1615 |
} else {
|
|
|
1616 |
$Ignored{$file} = "missing Filters_by_Language{$Language{$file}}";
|
|
|
1617 |
}
|
|
|
1618 |
next;
|
|
|
1619 |
}
|
|
|
1620 |
|
|
|
1621 |
my ($all_line_count,
|
|
|
1622 |
$blank_count ,
|
|
|
1623 |
$comment_count ,
|
|
|
1624 |
) = call_counter($file, $Language{$file}, \@Errors);
|
|
|
1625 |
my $code_count = $all_line_count - $blank_count - $comment_count;
|
|
|
1626 |
if ($opt_by_file) {
|
|
|
1627 |
$Results_by_File{$file}{'code' } = $code_count ;
|
|
|
1628 |
$Results_by_File{$file}{'blank' } = $blank_count ;
|
|
|
1629 |
$Results_by_File{$file}{'comment'} = $comment_count ;
|
|
|
1630 |
$Results_by_File{$file}{'lang' } = $Language{$file};
|
|
|
1631 |
$Results_by_File{$file}{'nFiles' } = 1;
|
|
|
1632 |
} else {
|
|
|
1633 |
$Results_by_File{$file} = 1; # just keep track of counted files
|
|
|
1634 |
}
|
|
|
1635 |
|
|
|
1636 |
$Results_by_Language{$Language{$file}}{'nFiles'}++;
|
|
|
1637 |
$Results_by_Language{$Language{$file}}{'code'} += $code_count ;
|
|
|
1638 |
$Results_by_Language{$Language{$file}}{'blank'} += $blank_count ;
|
|
|
1639 |
$Results_by_Language{$Language{$file}}{'comment'} += $comment_count;
|
|
|
1640 |
}
|
|
|
1641 |
my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
|
|
|
1642 |
write_file($opt_ignored, @ignored_reasons ) if $opt_ignored;
|
|
|
1643 |
write_file($opt_counted, sort keys %Results_by_File) if $opt_counted;
|
|
|
1644 |
# 1}}}
|
|
|
1645 |
# Step 7: Assemble results. {{{1
|
|
|
1646 |
#
|
|
|
1647 |
my $end_time = get_time();
|
|
|
1648 |
printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored)
|
|
|
1649 |
unless $opt_quiet;
|
|
|
1650 |
print_errors(\%Error_Codes, \@Errors) if @Errors;
|
|
|
1651 |
exit unless %Results_by_Language;
|
|
|
1652 |
|
|
|
1653 |
generate_sql($end_time - $start_time,
|
|
|
1654 |
\%Results_by_File, \%Scale_Factor) if $opt_sql;
|
|
|
1655 |
|
|
|
1656 |
exit if $skip_generate_report;
|
|
|
1657 |
if ($opt_by_file_by_lang) {
|
|
|
1658 |
push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
|
|
|
1659 |
"by file",
|
|
|
1660 |
\%Results_by_File, \%Scale_Factor);
|
|
|
1661 |
push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
|
|
|
1662 |
"by language",
|
|
|
1663 |
\%Results_by_Language, \%Scale_Factor);
|
|
|
1664 |
} elsif ($opt_by_file) {
|
|
|
1665 |
push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
|
|
|
1666 |
"by file",
|
|
|
1667 |
\%Results_by_File, \%Scale_Factor);
|
|
|
1668 |
} else {
|
|
|
1669 |
push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
|
|
|
1670 |
"by language",
|
|
|
1671 |
\%Results_by_Language, \%Scale_Factor);
|
|
|
1672 |
}
|
|
|
1673 |
# 1}}}
|
|
|
1674 |
}
|
|
|
1675 |
if ($opt_report_file) { write_file($opt_report_file, @Lines_Out); }
|
|
|
1676 |
else { print "\n", join("\n", @Lines_Out), "\n"; }
|
|
|
1677 |
if ($opt_count_diff) {
|
|
|
1678 |
++$opt_count_diff;
|
|
|
1679 |
exit if $opt_count_diff > 3;
|
|
|
1680 |
goto Top_of_Processing_Loop;
|
|
|
1681 |
}
|
|
|
1682 |
|
|
|
1683 |
sub process_exclude_list_file { # {{{1
|
|
|
1684 |
my ($list_file , # in
|
|
|
1685 |
$rh_exclude_dir , # out
|
|
|
1686 |
$rh_ignored , # out
|
|
|
1687 |
) = @_;
|
|
|
1688 |
# note: references global @file_list
|
|
|
1689 |
print "-> process_exclude_list_file($list_file)\n" if $opt_v > 2;
|
|
|
1690 |
# reject a specific set of files and/or directories
|
|
|
1691 |
my @reject_list = read_list_file($list_file);
|
|
|
1692 |
my @file_reject_list = ();
|
|
|
1693 |
foreach my $F_or_D (@reject_list) {
|
|
|
1694 |
if (is_dir($F_or_D)) {
|
|
|
1695 |
$rh_exclude_dir->{$F_or_D} = 1;
|
|
|
1696 |
} elsif (is_file($F_or_D)) {
|
|
|
1697 |
push @file_reject_list, $F_or_D;
|
|
|
1698 |
}
|
|
|
1699 |
}
|
|
|
1700 |
|
|
|
1701 |
# Normalize file names for better comparison.
|
|
|
1702 |
my %normalized_input = normalize_file_names(@file_list);
|
|
|
1703 |
my %normalized_reject = normalize_file_names(@file_reject_list);
|
|
|
1704 |
my %normalized_exclude = normalize_file_names(keys %{$rh_exclude_dir});
|
|
|
1705 |
foreach my $F (keys %normalized_input) {
|
|
|
1706 |
if ($normalized_reject{$F} or is_excluded($F, \%normalized_exclude)) {
|
|
|
1707 |
my $orig_F = $normalized_input{$F};
|
|
|
1708 |
$rh_ignored->{$orig_F} = "listed in exclusion file $opt_exclude_list_file";
|
|
|
1709 |
print "Ignoring $orig_F because it appears in $opt_exclude_list_file\n"
|
|
|
1710 |
if $opt_v > 1;
|
|
|
1711 |
}
|
|
|
1712 |
}
|
|
|
1713 |
print "<- process_exclude_list_file\n" if $opt_v > 2;
|
|
|
1714 |
} # 1}}}
|
|
|
1715 |
sub combine_results { # {{{1
|
|
|
1716 |
# returns 1 if the inputs are categorized by language
|
|
|
1717 |
# 0 if no identifiable language was found
|
|
|
1718 |
my ($ra_report_files, # in
|
|
|
1719 |
$report_type , # in "by language" or "by report file"
|
|
|
1720 |
$rhh_count , # out count{TYPE}{nFiles|code|blank|comment|scaled}
|
|
|
1721 |
$rhaa_Filters_by_Language , # in
|
|
|
1722 |
) = @_;
|
|
|
1723 |
|
|
|
1724 |
print "-> combine_results(report_type=$report_type)\n" if $opt_v > 2;
|
|
|
1725 |
my $found_language = 0;
|
|
|
1726 |
|
|
|
1727 |
foreach my $file (@{$ra_report_files}) {
|
|
|
1728 |
my $IN = new IO::File $file, "r";
|
|
|
1729 |
if (!defined $IN) {
|
|
|
1730 |
warn "Unable to read $file; ignoring.\n";
|
|
|
1731 |
next;
|
|
|
1732 |
}
|
|
|
1733 |
while (<$IN>) {
|
|
|
1734 |
next if /^(http|Language|SUM|-----)/;
|
|
|
1735 |
if (!$opt_by_file and
|
|
|
1736 |
m{^(.*?)\s+ # language
|
|
|
1737 |
(\d+)\s+ # files
|
|
|
1738 |
(\d+)\s+ # blank
|
|
|
1739 |
(\d+)\s+ # comments
|
|
|
1740 |
(\d+)\s+ # code
|
|
|
1741 |
( # next four entries missing with -nno3
|
|
|
1742 |
x\s+ # x
|
|
|
1743 |
\d+\.\d+\s+ # scale
|
|
|
1744 |
=\s+ # =
|
|
|
1745 |
(\d+\.\d+)\s* # scaled code
|
|
|
1746 |
)?
|
|
|
1747 |
$}x) {
|
|
|
1748 |
if ($report_type eq "by language") {
|
|
|
1749 |
next unless @{$rhaa_Filters_by_Language->{$1}};
|
|
|
1750 |
# above test necessary to avoid trying to sum reports
|
|
|
1751 |
# of reports (which have no language breakdown).
|
|
|
1752 |
$found_language = 1;
|
|
|
1753 |
$rhh_count->{$1 }{'nFiles' } += $2;
|
|
|
1754 |
$rhh_count->{$1 }{'blank' } += $3;
|
|
|
1755 |
$rhh_count->{$1 }{'comment'} += $4;
|
|
|
1756 |
$rhh_count->{$1 }{'code' } += $5;
|
|
|
1757 |
$rhh_count->{$1 }{'scaled' } += $7 if $opt_3;
|
|
|
1758 |
} else {
|
|
|
1759 |
$rhh_count->{$file}{'nFiles' } += $2;
|
|
|
1760 |
$rhh_count->{$file}{'blank' } += $3;
|
|
|
1761 |
$rhh_count->{$file}{'comment'} += $4;
|
|
|
1762 |
$rhh_count->{$file}{'code' } += $5;
|
|
|
1763 |
$rhh_count->{$file}{'scaled' } += $7 if $opt_3;
|
|
|
1764 |
}
|
|
|
1765 |
} elsif ($opt_by_file and
|
|
|
1766 |
m{^(.*?)\s+ # language
|
|
|
1767 |
(\d+)\s+ # blank
|
|
|
1768 |
(\d+)\s+ # comments
|
|
|
1769 |
(\d+)\s+ # code
|
|
|
1770 |
( # next four entries missing with -nno3
|
|
|
1771 |
x\s+ # x
|
|
|
1772 |
\d+\.\d+\s+ # scale
|
|
|
1773 |
=\s+ # =
|
|
|
1774 |
(\d+\.\d+)\s* # scaled code
|
|
|
1775 |
)?
|
|
|
1776 |
$}x) {
|
|
|
1777 |
if ($report_type eq "by language") {
|
|
|
1778 |
next unless %{$rhaa_Filters_by_Language->{$1}};
|
|
|
1779 |
# above test necessary to avoid trying to sum reports
|
|
|
1780 |
# of reports (which have no language breakdown).
|
|
|
1781 |
$found_language = 1;
|
|
|
1782 |
$rhh_count->{$1 }{'nFiles' } += 1;
|
|
|
1783 |
$rhh_count->{$1 }{'blank' } += $2;
|
|
|
1784 |
$rhh_count->{$1 }{'comment'} += $3;
|
|
|
1785 |
$rhh_count->{$1 }{'code' } += $4;
|
|
|
1786 |
$rhh_count->{$1 }{'scaled' } += $6 if $opt_3;
|
|
|
1787 |
} else {
|
|
|
1788 |
$rhh_count->{$file}{'nFiles' } += 1;
|
|
|
1789 |
$rhh_count->{$file}{'blank' } += $2;
|
|
|
1790 |
$rhh_count->{$file}{'comment'} += $3;
|
|
|
1791 |
$rhh_count->{$file}{'code' } += $4;
|
|
|
1792 |
$rhh_count->{$file}{'scaled' } += $6 if $opt_3;
|
|
|
1793 |
}
|
|
|
1794 |
}
|
|
|
1795 |
}
|
|
|
1796 |
}
|
|
|
1797 |
print "<- combine_results\n" if $opt_v > 2;
|
|
|
1798 |
return $found_language;
|
|
|
1799 |
} # 1}}}
|
|
|
1800 |
sub compute_denominator { # {{{1
|
|
|
1801 |
my ($method, $nCode, $nComment, $nBlank, ) = @_;
|
|
|
1802 |
print "-> compute_denominator\n" if $opt_v > 2;
|
|
|
1803 |
my %den = ( "c" => $nCode );
|
|
|
1804 |
$den{"cm"} = $den{"c"} + $nComment;
|
|
|
1805 |
$den{"cmb"} = $den{"cm"} + $nBlank;
|
|
|
1806 |
$den{"cb"} = $den{"c"} + $nBlank;
|
|
|
1807 |
|
|
|
1808 |
print "<- compute_denominator\n" if $opt_v > 2;
|
|
|
1809 |
return $den{ $method };
|
|
|
1810 |
} # 1}}}
|
|
|
1811 |
sub diff_report { # {{{1
|
|
|
1812 |
# returns an array of lines containing the results
|
|
|
1813 |
print "-> diff_report\n" if $opt_v > 2;
|
|
|
1814 |
|
|
|
1815 |
if ($opt_xml or $opt_yaml) {
|
|
|
1816 |
print "<- diff_report\n" if $opt_v > 2;
|
|
|
1817 |
return diff_xml_yaml_report(@_)
|
|
|
1818 |
} elsif ($opt_csv) {
|
|
|
1819 |
print "<- diff_report\n" if $opt_v > 2;
|
|
|
1820 |
return diff_csv_report(@_)
|
|
|
1821 |
}
|
|
|
1822 |
|
|
|
1823 |
my ($version , # in
|
|
|
1824 |
$elapsed_sec, # in
|
|
|
1825 |
$report_type, # in "by language" | "by report file" | "by file"
|
|
|
1826 |
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
|
|
|
1827 |
$rh_scale , # in
|
|
|
1828 |
) = @_;
|
|
|
1829 |
|
|
|
1830 |
#use Data::Dumper;
|
|
|
1831 |
#print "diff_report: ", Dumper($rhhh_count), "\n";
|
|
|
1832 |
my @results = ();
|
|
|
1833 |
|
|
|
1834 |
my $languages = ();
|
|
|
1835 |
my %sum = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed}
|
|
|
1836 |
my $max_len = 0;
|
|
|
1837 |
foreach my $language (keys %{$rhhh_count}) {
|
|
|
1838 |
foreach my $V (qw(nFiles blank comment code)) {
|
|
|
1839 |
foreach my $S (qw(added same modified removed)) {
|
|
|
1840 |
$rhhh_count->{$language}{$V}{$S} = 0 unless
|
|
|
1841 |
defined $rhhh_count->{$language}{$V}{$S};
|
|
|
1842 |
$sum{$V}{$S} += $rhhh_count->{$language}{$V}{$S};
|
|
|
1843 |
}
|
|
|
1844 |
}
|
|
|
1845 |
$max_len = length($language) if length($language) > $max_len;
|
|
|
1846 |
}
|
|
|
1847 |
my $column_1_offset = 0;
|
|
|
1848 |
$column_1_offset = $max_len - 17 if $max_len > 17;
|
|
|
1849 |
$elapsed_sec = 0.5 unless $elapsed_sec;
|
|
|
1850 |
|
|
|
1851 |
my $spacing_0 = 23;
|
|
|
1852 |
my $spacing_1 = 13;
|
|
|
1853 |
my $spacing_2 = 9;
|
|
|
1854 |
my $spacing_3 = 17;
|
|
|
1855 |
if (!$opt_3) {
|
|
|
1856 |
$spacing_1 = 19;
|
|
|
1857 |
$spacing_2 = 14;
|
|
|
1858 |
$spacing_3 = 28;
|
|
|
1859 |
}
|
|
|
1860 |
$spacing_0 += $column_1_offset;
|
|
|
1861 |
$spacing_1 += $column_1_offset;
|
|
|
1862 |
$spacing_3 += $column_1_offset;
|
|
|
1863 |
my %Format = (
|
|
|
1864 |
'1' => { 'xml' => 'name="%s" ',
|
|
|
1865 |
'txt' => "\%-${spacing_0}s ",
|
|
|
1866 |
},
|
|
|
1867 |
'2' => { 'xml' => 'name="%s" ',
|
|
|
1868 |
'txt' => "\%-${spacing_3}s ",
|
|
|
1869 |
},
|
|
|
1870 |
'3' => { 'xml' => 'files_count="%d" ',
|
|
|
1871 |
'txt' => '%5d ',
|
|
|
1872 |
},
|
|
|
1873 |
'4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
|
|
|
1874 |
'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
|
|
|
1875 |
},
|
|
|
1876 |
'5' => { 'xml' => 'blank="%.2f" comment="%.2f" code="%d" ',
|
|
|
1877 |
'txt' => "\%3.2f \%3.2f \%${spacing_2}d",
|
|
|
1878 |
},
|
|
|
1879 |
'6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
|
|
|
1880 |
'txt' => ' x %6.2f = %14.2f',
|
|
|
1881 |
},
|
|
|
1882 |
);
|
|
|
1883 |
my $Style = "txt";
|
|
|
1884 |
$Style = "xml" if $opt_xml ;
|
|
|
1885 |
$Style = "xml" if $opt_yaml; # not a typo; just set to anything but txt
|
|
|
1886 |
$Style = "xml" if $opt_csv ; # not a typo; just set to anything but txt
|
|
|
1887 |
|
|
|
1888 |
my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
|
|
|
1889 |
$hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
|
|
|
1890 |
if (!$opt_3) and (68 + $column_1_offset) > 79;
|
|
|
1891 |
my $data_line = "";
|
|
|
1892 |
my $first_column;
|
|
|
1893 |
my $BY_LANGUAGE = 0;
|
|
|
1894 |
my $BY_FILE = 0;
|
|
|
1895 |
if ($report_type eq "by language") {
|
|
|
1896 |
$first_column = "Language";
|
|
|
1897 |
$BY_LANGUAGE = 1;
|
|
|
1898 |
} elsif ($report_type eq "by file") {
|
|
|
1899 |
$first_column = "File";
|
|
|
1900 |
$BY_FILE = 1;
|
|
|
1901 |
} else {
|
|
|
1902 |
$first_column = "Report File";
|
|
|
1903 |
}
|
|
|
1904 |
|
|
|
1905 |
my $header_line = sprintf "%s v %s", $URL, $version;
|
|
|
1906 |
my $sum_files = 1;
|
|
|
1907 |
my $sum_lines = 1;
|
|
|
1908 |
$header_line .= sprintf(" T=%.2f s (%.1f files/s, %.1f lines/s)",
|
|
|
1909 |
$elapsed_sec ,
|
|
|
1910 |
$sum_files/$elapsed_sec,
|
|
|
1911 |
$sum_lines/$elapsed_sec) unless $opt_sum_reports;
|
|
|
1912 |
if ($Style eq "txt") {
|
|
|
1913 |
push @results, output_header($header_line, $hyphen_line, $BY_FILE);
|
|
|
1914 |
} elsif ($Style eq "csv") {
|
|
|
1915 |
die "csv";
|
|
|
1916 |
}
|
|
|
1917 |
|
|
|
1918 |
# column headers
|
|
|
1919 |
if (!$opt_3 and $BY_FILE) {
|
|
|
1920 |
my $spacing_n = $spacing_1 - 11;
|
|
|
1921 |
$data_line = sprintf "%-${spacing_n}s" , $first_column;
|
|
|
1922 |
} else {
|
|
|
1923 |
$data_line = sprintf "%-${spacing_1}s ", $first_column;
|
|
|
1924 |
}
|
|
|
1925 |
if ($BY_FILE) {
|
|
|
1926 |
$data_line .= sprintf "%${spacing_2}s" , "" ;
|
|
|
1927 |
} else {
|
|
|
1928 |
$data_line .= sprintf "%${spacing_2}s " , "files";
|
|
|
1929 |
}
|
|
|
1930 |
my $PCT_symbol = "";
|
|
|
1931 |
$PCT_symbol = " \%" if $opt_by_percent;
|
|
|
1932 |
$data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
|
|
|
1933 |
"blank${PCT_symbol}" ,
|
|
|
1934 |
"comment${PCT_symbol}" ,
|
|
|
1935 |
"code";
|
|
|
1936 |
|
|
|
1937 |
if ($Style eq "txt") {
|
|
|
1938 |
push @results, $data_line;
|
|
|
1939 |
push @results, $hyphen_line;
|
|
|
1940 |
}
|
|
|
1941 |
|
|
|
1942 |
####foreach my $lang_or_file (keys %{$rhhh_count}) {
|
|
|
1943 |
#### $rhhh_count->{$lang_or_file}{'code'} = 0 unless
|
|
|
1944 |
#### defined $rhhh_count->{$lang_or_file}{'code'};
|
|
|
1945 |
####}
|
|
|
1946 |
foreach my $lang_or_file (sort {
|
|
|
1947 |
$rhhh_count->{$b}{'code'} <=>
|
|
|
1948 |
$rhhh_count->{$a}{'code'}
|
|
|
1949 |
}
|
|
|
1950 |
keys %{$rhhh_count}) {
|
|
|
1951 |
|
|
|
1952 |
if ($BY_FILE) {
|
|
|
1953 |
push @results, rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
|
|
|
1954 |
} else {
|
|
|
1955 |
push @results, $lang_or_file;
|
|
|
1956 |
}
|
|
|
1957 |
foreach my $S (qw(same modified added removed)) {
|
|
|
1958 |
my $indent = $spacing_1 - 2;
|
|
|
1959 |
my $line .= sprintf " %-${indent}s", $S;
|
|
|
1960 |
if ($BY_FILE) {
|
|
|
1961 |
$line .= sprintf " ";
|
|
|
1962 |
} else {
|
|
|
1963 |
$line .= sprintf " %${spacing_2}s", $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
|
|
|
1964 |
}
|
|
|
1965 |
if ($opt_by_percent) {
|
|
|
1966 |
my $DEN = compute_denominator($opt_by_percent ,
|
|
|
1967 |
$rhhh_count->{$lang_or_file}{'code'}{$S} ,
|
|
|
1968 |
$rhhh_count->{$lang_or_file}{'comment'}{$S},
|
|
|
1969 |
$rhhh_count->{$lang_or_file}{'blank'}{$S} );
|
|
|
1970 |
if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
|
|
|
1971 |
$line .= sprintf " %14.2f %14.2f %${spacing_2}s",
|
|
|
1972 |
$rhhh_count->{$lang_or_file}{'blank'}{$S} / $DEN * 100,
|
|
|
1973 |
$rhhh_count->{$lang_or_file}{'comment'}{$S} / $DEN * 100,
|
|
|
1974 |
$rhhh_count->{$lang_or_file}{'code'}{$S} ;
|
|
|
1975 |
} else {
|
|
|
1976 |
$line .= sprintf " %14.2f %14.2f %${spacing_2}s",
|
|
|
1977 |
0.0, 0.0, $rhhh_count->{$lang_or_file}{'code'}{$S} ;
|
|
|
1978 |
}
|
|
|
1979 |
} else {
|
|
|
1980 |
$line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
|
|
|
1981 |
$rhhh_count->{$lang_or_file}{'blank'}{$S} ,
|
|
|
1982 |
$rhhh_count->{$lang_or_file}{'comment'}{$S} ,
|
|
|
1983 |
$rhhh_count->{$lang_or_file}{'code'}{$S} ;
|
|
|
1984 |
}
|
|
|
1985 |
push @results, $line;
|
|
|
1986 |
}
|
|
|
1987 |
}
|
|
|
1988 |
push @results, "-" x 79;
|
|
|
1989 |
push @results, "SUM:";
|
|
|
1990 |
foreach my $S (qw(same modified added removed)) {
|
|
|
1991 |
my $indent = $spacing_1 - 2;
|
|
|
1992 |
my $line .= sprintf " %-${indent}s", $S;
|
|
|
1993 |
if ($BY_FILE) {
|
|
|
1994 |
$line .= sprintf " ";
|
|
|
1995 |
} else {
|
|
|
1996 |
$line .= sprintf " %${spacing_2}s", $sum{'nFiles'}{$S};
|
|
|
1997 |
}
|
|
|
1998 |
if ($opt_by_percent) {
|
|
|
1999 |
my $DEN = compute_denominator($opt_by_percent,
|
|
|
2000 |
$sum{'code'}{$S}, $sum{'comment'}{$S}, $sum{'blank'}{$S});
|
|
|
2001 |
if ($sum{'code'}{$S} > 0) {
|
|
|
2002 |
$line .= sprintf " %14.2f %14.2f %${spacing_2}s",
|
|
|
2003 |
$sum{'blank'}{$S} / $DEN * 100,
|
|
|
2004 |
$sum{'comment'}{$S} / $DEN * 100,
|
|
|
2005 |
$sum{'code'}{$S} ;
|
|
|
2006 |
} else {
|
|
|
2007 |
$line .= sprintf " %14.2f %14.2f %${spacing_2}s",
|
|
|
2008 |
0.0, 0.0, $sum{'code'}{$S} ;
|
|
|
2009 |
}
|
|
|
2010 |
} else {
|
|
|
2011 |
$line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
|
|
|
2012 |
$sum{'blank'}{$S} ,
|
|
|
2013 |
$sum{'comment'}{$S} ,
|
|
|
2014 |
$sum{'code'}{$S} ;
|
|
|
2015 |
}
|
|
|
2016 |
push @results, $line;
|
|
|
2017 |
}
|
|
|
2018 |
push @results, "-" x 79;
|
|
|
2019 |
write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
|
|
|
2020 |
print "<- diff_report\n" if $opt_v > 2;
|
|
|
2021 |
|
|
|
2022 |
return @results;
|
|
|
2023 |
} # 1}}}
|
|
|
2024 |
sub xml_or_yaml_header { # {{{1
|
|
|
2025 |
my ($URL, $version, $elapsed_sec, $sum_files, $sum_lines, $by_file) = @_;
|
|
|
2026 |
print "-> xml_or_yaml_header\n" if $opt_v > 2;
|
|
|
2027 |
my $header = "";
|
|
|
2028 |
my $file_rate = $sum_files/$elapsed_sec;
|
|
|
2029 |
my $line_rate = $sum_lines/$elapsed_sec;
|
|
|
2030 |
my $type = "";
|
|
|
2031 |
$type = "diff_" if $opt_diff;
|
|
|
2032 |
my $report_file = "";
|
|
|
2033 |
if ($opt_report_file) {
|
|
|
2034 |
if ($opt_sum_reports) {
|
|
|
2035 |
if ($by_file) {
|
|
|
2036 |
$report_file = " <report_file>$opt_report_file.file</report_file>"
|
|
|
2037 |
} else {
|
|
|
2038 |
$report_file = " <report_file>$opt_report_file.lang</report_file>"
|
|
|
2039 |
}
|
|
|
2040 |
} else {
|
|
|
2041 |
$report_file = " <report_file>$opt_report_file</report_file>"
|
|
|
2042 |
}
|
|
|
2043 |
}
|
|
|
2044 |
if ($opt_xml) {
|
|
|
2045 |
$header = "<?xml version=\"1.0\"?>";
|
|
|
2046 |
$header .= "\n<?xml-stylesheet type=\"text/xsl\" href=\"" . $opt_xsl . "\"?>" if $opt_xsl;
|
|
|
2047 |
$header .= "<${type}results>
|
|
|
2048 |
<header>
|
|
|
2049 |
<cloc_url>$URL</cloc_url>
|
|
|
2050 |
<cloc_version>$version</cloc_version>
|
|
|
2051 |
<elapsed_seconds>$elapsed_sec</elapsed_seconds>
|
|
|
2052 |
<n_files>$sum_files</n_files>
|
|
|
2053 |
<n_lines>$sum_lines</n_lines>
|
|
|
2054 |
<files_per_second>$file_rate</files_per_second>
|
|
|
2055 |
<lines_per_second>$line_rate</lines_per_second>";
|
|
|
2056 |
$header .= "\n$report_file"
|
|
|
2057 |
if $opt_report_file;
|
|
|
2058 |
$header .= "\n</header>";
|
|
|
2059 |
} elsif ($opt_yaml) {
|
|
|
2060 |
$header = "---\n# $URL
|
|
|
2061 |
header :
|
|
|
2062 |
cloc_url : http://cloc.sourceforge.net
|
|
|
2063 |
cloc_version : $version
|
|
|
2064 |
elapsed_seconds : $elapsed_sec
|
|
|
2065 |
n_files : $sum_files
|
|
|
2066 |
n_lines : $sum_lines
|
|
|
2067 |
files_per_second : $file_rate
|
|
|
2068 |
lines_per_second : $line_rate";
|
|
|
2069 |
if ($opt_report_file) {
|
|
|
2070 |
if ($opt_sum_reports) {
|
|
|
2071 |
if ($by_file) {
|
|
|
2072 |
$header .= "\n report_file : $opt_report_file.file"
|
|
|
2073 |
} else {
|
|
|
2074 |
$header .= "\n report_file : $opt_report_file.lang"
|
|
|
2075 |
}
|
|
|
2076 |
} else {
|
|
|
2077 |
$header .= "\n report_file : $opt_report_file";
|
|
|
2078 |
}
|
|
|
2079 |
}
|
|
|
2080 |
}
|
|
|
2081 |
print "<- xml_or_yaml_header\n" if $opt_v > 2;
|
|
|
2082 |
return $header;
|
|
|
2083 |
} # 1}}}
|
|
|
2084 |
sub diff_xml_yaml_report { # {{{1
|
|
|
2085 |
# returns an array of lines containing the results
|
|
|
2086 |
my ($version , # in
|
|
|
2087 |
$elapsed_sec, # in
|
|
|
2088 |
$report_type, # in "by language" | "by report file" | "by file"
|
|
|
2089 |
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
|
|
|
2090 |
$rh_scale , # in
|
|
|
2091 |
) = @_;
|
|
|
2092 |
print "-> diff_xml_yaml_report\n" if $opt_v > 2;
|
|
|
2093 |
|
|
|
2094 |
#print "diff_report: ", Dumper($rhhh_count), "\n";
|
|
|
2095 |
my @results = ();
|
|
|
2096 |
|
|
|
2097 |
my $languages = ();
|
|
|
2098 |
my %sum = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed}
|
|
|
2099 |
|
|
|
2100 |
my $sum_files = 0;
|
|
|
2101 |
my $sum_lines = 0;
|
|
|
2102 |
foreach my $language (keys %{$rhhh_count}) {
|
|
|
2103 |
foreach my $V (qw(nFiles blank comment code)) {
|
|
|
2104 |
foreach my $S (qw(added same modified removed)) {
|
|
|
2105 |
$rhhh_count->{$language}{$V}{$S} = 0 unless
|
|
|
2106 |
defined $rhhh_count->{$language}{$V}{$S};
|
|
|
2107 |
$sum{$V}{$S} += $rhhh_count->{$language}{$V}{$S};
|
|
|
2108 |
if ($V eq "nFiles") {
|
|
|
2109 |
$sum_files += $rhhh_count->{$language}{$V}{$S};
|
|
|
2110 |
} else {
|
|
|
2111 |
$sum_lines += $rhhh_count->{$language}{$V}{$S};
|
|
|
2112 |
}
|
|
|
2113 |
}
|
|
|
2114 |
}
|
|
|
2115 |
}
|
|
|
2116 |
$elapsed_sec = 0.5 unless $elapsed_sec;
|
|
|
2117 |
|
|
|
2118 |
my $data_line = "";
|
|
|
2119 |
my $BY_LANGUAGE = 0;
|
|
|
2120 |
my $BY_FILE = 0;
|
|
|
2121 |
if ($report_type eq "by language") {
|
|
|
2122 |
$BY_LANGUAGE = 1;
|
|
|
2123 |
} elsif ($report_type eq "by file") {
|
|
|
2124 |
$BY_FILE = 1;
|
|
|
2125 |
}
|
|
|
2126 |
|
|
|
2127 |
if (!$ALREADY_SHOWED_HEADER) {
|
|
|
2128 |
push @results,
|
|
|
2129 |
xml_or_yaml_header($URL, $version, $elapsed_sec,
|
|
|
2130 |
$sum_files, $sum_lines, $BY_FILE);
|
|
|
2131 |
$ALREADY_SHOWED_HEADER = 1;
|
|
|
2132 |
}
|
|
|
2133 |
|
|
|
2134 |
foreach my $S (qw(same modified added removed)) {
|
|
|
2135 |
if ($opt_xml) {
|
|
|
2136 |
push @results, " <$S>";
|
|
|
2137 |
} elsif ($opt_yaml) {
|
|
|
2138 |
push @results, "$S :";
|
|
|
2139 |
}
|
|
|
2140 |
########foreach my $lang_or_file (keys %{$rhhh_count}) {
|
|
|
2141 |
######## $rhhh_count->{$lang_or_file}{'code'} = 0 unless
|
|
|
2142 |
######## defined $rhhh_count->{$lang_or_file}{'code'};
|
|
|
2143 |
########}
|
|
|
2144 |
foreach my $lang_or_file (sort {
|
|
|
2145 |
$rhhh_count->{$b}{'code'} <=>
|
|
|
2146 |
$rhhh_count->{$a}{'code'}
|
|
|
2147 |
}
|
|
|
2148 |
keys %{$rhhh_count}) {
|
|
|
2149 |
my $L = "";
|
|
|
2150 |
if ($opt_xml) {
|
|
|
2151 |
if ($BY_FILE) {
|
|
|
2152 |
$L .= sprintf " <file name=\"%s\" files_count=\"1\" ",
|
|
|
2153 |
xml_metachars(
|
|
|
2154 |
rm_leading_tempdir($lang_or_file, \%TEMP_DIR));
|
|
|
2155 |
} else {
|
|
|
2156 |
$L .= sprintf " <language name=\"%s\" files_count=\"%d\" ",
|
|
|
2157 |
$lang_or_file ,
|
|
|
2158 |
$rhhh_count->{$lang_or_file}{'nFiles'}{$S};
|
|
|
2159 |
}
|
|
|
2160 |
if ($opt_by_percent) {
|
|
|
2161 |
my $DEN = compute_denominator($opt_by_percent ,
|
|
|
2162 |
$rhhh_count->{$lang_or_file}{'code'}{$S} ,
|
|
|
2163 |
$rhhh_count->{$lang_or_file}{'comment'}{$S},
|
|
|
2164 |
$rhhh_count->{$lang_or_file}{'blank'}{$S} );
|
|
|
2165 |
foreach my $T (qw(blank comment)) {
|
|
|
2166 |
if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
|
|
|
2167 |
$L .= sprintf "%s=\"%.2f\" ",
|
|
|
2168 |
$T, $rhhh_count->{$lang_or_file}{$T}{$S} / $DEN * 100;
|
|
|
2169 |
} else {
|
|
|
2170 |
$L .= sprintf "%s=\"0.0\" ", $T;
|
|
|
2171 |
}
|
|
|
2172 |
}
|
|
|
2173 |
foreach my $T (qw(code)) {
|
|
|
2174 |
$L .= sprintf "%s=\"%d\" ",
|
|
|
2175 |
$T, $rhhh_count->{$lang_or_file}{$T}{$S};
|
|
|
2176 |
}
|
|
|
2177 |
} else {
|
|
|
2178 |
foreach my $T (qw(blank comment code)) {
|
|
|
2179 |
$L .= sprintf "%s=\"%d\" ",
|
|
|
2180 |
$T, $rhhh_count->{$lang_or_file}{$T}{$S};
|
|
|
2181 |
}
|
|
|
2182 |
}
|
|
|
2183 |
push @results, $L . "/>";
|
|
|
2184 |
} elsif ($opt_yaml) {
|
|
|
2185 |
if ($BY_FILE) {
|
|
|
2186 |
push @results, sprintf " - file : %s",
|
|
|
2187 |
rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
|
|
|
2188 |
push @results, sprintf " files_count : 1",
|
|
|
2189 |
} else {
|
|
|
2190 |
push @results, sprintf " - language : %s", $lang_or_file;
|
|
|
2191 |
push @results, sprintf " files_count : %d",
|
|
|
2192 |
$rhhh_count->{$lang_or_file}{'nFiles'}{$S};
|
|
|
2193 |
}
|
|
|
2194 |
if ($opt_by_percent) {
|
|
|
2195 |
my $DEN = compute_denominator($opt_by_percent ,
|
|
|
2196 |
$rhhh_count->{$lang_or_file}{'code'}{$S} ,
|
|
|
2197 |
$rhhh_count->{$lang_or_file}{'comment'}{$S},
|
|
|
2198 |
$rhhh_count->{$lang_or_file}{'blank'}{$S} );
|
|
|
2199 |
foreach my $T (qw(blank comment)) {
|
|
|
2200 |
if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
|
|
|
2201 |
push @results, sprintf " %s : %.2f",
|
|
|
2202 |
$T, $rhhh_count->{$lang_or_file}{$T}{$S} / $DEN * 100;
|
|
|
2203 |
} else {
|
|
|
2204 |
push @results, sprintf " %s : 0.0", $T;
|
|
|
2205 |
}
|
|
|
2206 |
}
|
|
|
2207 |
foreach my $T (qw(code)) {
|
|
|
2208 |
push @results, sprintf " %s : %d",
|
|
|
2209 |
$T, $rhhh_count->{$lang_or_file}{$T}{$S};
|
|
|
2210 |
}
|
|
|
2211 |
} else {
|
|
|
2212 |
foreach my $T (qw(blank comment code)) {
|
|
|
2213 |
push @results, sprintf " %s : %d",
|
|
|
2214 |
$T, $rhhh_count->{$lang_or_file}{$T}{$S};
|
|
|
2215 |
}
|
|
|
2216 |
}
|
|
|
2217 |
}
|
|
|
2218 |
}
|
|
|
2219 |
|
|
|
2220 |
if ($opt_xml) {
|
|
|
2221 |
my $L = sprintf " <total sum_files=\"%d\" ", $sum{'nFiles'}{$S};
|
|
|
2222 |
if ($opt_by_percent) {
|
|
|
2223 |
my $DEN = compute_denominator($opt_by_percent,
|
|
|
2224 |
$sum{'code'}{$S} ,
|
|
|
2225 |
$sum{'comment'}{$S},
|
|
|
2226 |
$sum{'blank'}{$S} );
|
|
|
2227 |
foreach my $V (qw(blank comment)) {
|
|
|
2228 |
if ($sum{'code'}{$S} > 0) {
|
|
|
2229 |
$L .= sprintf "%s=\"%.2f\" ", $V, $sum{$V}{$S} / $DEN * 100;
|
|
|
2230 |
} else {
|
|
|
2231 |
$L .= sprintf "%s=\"0.0\" ", $V;
|
|
|
2232 |
}
|
|
|
2233 |
}
|
|
|
2234 |
foreach my $V (qw(code)) {
|
|
|
2235 |
$L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
|
|
|
2236 |
}
|
|
|
2237 |
} else {
|
|
|
2238 |
foreach my $V (qw(blank comment code)) {
|
|
|
2239 |
$L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
|
|
|
2240 |
}
|
|
|
2241 |
}
|
|
|
2242 |
push @results, $L . "/>";
|
|
|
2243 |
push @results, " </$S>";
|
|
|
2244 |
} elsif ($opt_yaml) {
|
|
|
2245 |
push @results, sprintf "%s_total :\n sum_files : %d",
|
|
|
2246 |
$S, $sum{'nFiles'}{$S};
|
|
|
2247 |
if ($opt_by_percent) {
|
|
|
2248 |
my $DEN = compute_denominator($opt_by_percent ,
|
|
|
2249 |
$sum{'code'}{$S} ,
|
|
|
2250 |
$sum{'comment'}{$S},
|
|
|
2251 |
$sum{'blank'}{$S} );
|
|
|
2252 |
foreach my $V (qw(blank comment)) {
|
|
|
2253 |
if ($sum{'code'}{$S} > 0) {
|
|
|
2254 |
push @results, sprintf " %s : %.2f", $V, $sum{$V}{$S} / $DEN * 100;
|
|
|
2255 |
} else {
|
|
|
2256 |
push @results, sprintf " %s : 0.0", $V;
|
|
|
2257 |
}
|
|
|
2258 |
}
|
|
|
2259 |
foreach my $V (qw(code)) {
|
|
|
2260 |
push @results, sprintf " %s : %d", $V, $sum{$V}{$S};
|
|
|
2261 |
}
|
|
|
2262 |
} else {
|
|
|
2263 |
foreach my $V (qw(blank comment code)) {
|
|
|
2264 |
push @results, sprintf " %s : %d", $V, $sum{$V}{$S};
|
|
|
2265 |
}
|
|
|
2266 |
}
|
|
|
2267 |
}
|
|
|
2268 |
}
|
|
|
2269 |
|
|
|
2270 |
if ($opt_xml) {
|
|
|
2271 |
push @results, "</diff_results>";
|
|
|
2272 |
}
|
|
|
2273 |
write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
|
|
|
2274 |
print "<- diff_xml_yaml_report\n" if $opt_v > 2;
|
|
|
2275 |
return @results;
|
|
|
2276 |
} # 1}}}
|
|
|
2277 |
sub diff_csv_report { # {{{1
|
|
|
2278 |
# returns an array of lines containing the results
|
|
|
2279 |
my ($version , # in
|
|
|
2280 |
$elapsed_sec, # in
|
|
|
2281 |
$report_type, # in "by language" | "by report file" | "by file"
|
|
|
2282 |
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
|
|
|
2283 |
$rh_scale , # in unused
|
|
|
2284 |
) = @_;
|
|
|
2285 |
print "-> diff_csv_report\n" if $opt_v > 2;
|
|
|
2286 |
|
|
|
2287 |
#use Data::Dumper;
|
|
|
2288 |
#print "diff_csv_report: ", Dumper($rhhh_count), "\n";
|
|
|
2289 |
#die;
|
|
|
2290 |
my @results = ();
|
|
|
2291 |
my $languages = ();
|
|
|
2292 |
|
|
|
2293 |
my $data_line = "";
|
|
|
2294 |
my $BY_LANGUAGE = 0;
|
|
|
2295 |
my $BY_FILE = 0;
|
|
|
2296 |
if ($report_type eq "by language") {
|
|
|
2297 |
$BY_LANGUAGE = 1;
|
|
|
2298 |
} elsif ($report_type eq "by file") {
|
|
|
2299 |
$BY_FILE = 1;
|
|
|
2300 |
}
|
|
|
2301 |
my $DELIM = ",";
|
|
|
2302 |
$DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
|
|
|
2303 |
|
|
|
2304 |
$elapsed_sec = 0.5 unless $elapsed_sec;
|
|
|
2305 |
|
|
|
2306 |
my $line = "Language${DELIM} ";
|
|
|
2307 |
$line = "File${DELIM} " if $BY_FILE;
|
|
|
2308 |
foreach my $item (qw(files blank comment code)) {
|
|
|
2309 |
next if $BY_FILE and $item eq 'files';
|
|
|
2310 |
foreach my $symbol ( '==', '!=', '+', '-', ) {
|
|
|
2311 |
$line .= "$symbol $item${DELIM} ";
|
|
|
2312 |
}
|
|
|
2313 |
}
|
|
|
2314 |
$line .= "\"$URL v $version T=$elapsed_sec s\"";
|
|
|
2315 |
push @results, $line;
|
|
|
2316 |
|
|
|
2317 |
foreach my $lang_or_file (keys %{$rhhh_count}) {
|
|
|
2318 |
$rhhh_count->{$lang_or_file}{'code'}{'added'} = 0 unless
|
|
|
2319 |
defined $rhhh_count->{$lang_or_file}{'code'};
|
|
|
2320 |
}
|
|
|
2321 |
foreach my $lang_or_file (sort {
|
|
|
2322 |
$rhhh_count->{$b}{'code'} <=>
|
|
|
2323 |
$rhhh_count->{$a}{'code'}
|
|
|
2324 |
}
|
|
|
2325 |
keys %{$rhhh_count}) {
|
|
|
2326 |
if ($BY_FILE) {
|
|
|
2327 |
$line = rm_leading_tempdir($lang_or_file, \%TEMP_DIR) . "$DELIM ";
|
|
|
2328 |
} else {
|
|
|
2329 |
$line = $lang_or_file . "${DELIM} ";
|
|
|
2330 |
}
|
|
|
2331 |
if ($opt_by_percent) {
|
|
|
2332 |
foreach my $item (qw(nFiles)) {
|
|
|
2333 |
next if $BY_FILE and $item eq 'nFiles';
|
|
|
2334 |
foreach my $symbol (qw(same modified added removed)) {
|
|
|
2335 |
if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
|
|
|
2336 |
$line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
|
|
|
2337 |
} else {
|
|
|
2338 |
$line .= "0${DELIM} ";
|
|
|
2339 |
}
|
|
|
2340 |
}
|
|
|
2341 |
}
|
|
|
2342 |
foreach my $item (qw(blank comment)) {
|
|
|
2343 |
foreach my $symbol (qw(same modified added removed)) {
|
|
|
2344 |
if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol} and
|
|
|
2345 |
defined $rhhh_count->{$lang_or_file}{'code'}{$symbol} and
|
|
|
2346 |
$rhhh_count->{$lang_or_file}{'code'}{$symbol} > 0) {
|
|
|
2347 |
$line .= sprintf("%.2f", $rhhh_count->{$lang_or_file}{$item}{$symbol} / $rhhh_count->{$lang_or_file}{'code'}{$symbol} * 100).${DELIM};
|
|
|
2348 |
} else {
|
|
|
2349 |
$line .= "0.00${DELIM} ";
|
|
|
2350 |
}
|
|
|
2351 |
}
|
|
|
2352 |
}
|
|
|
2353 |
foreach my $item (qw(code)) {
|
|
|
2354 |
foreach my $symbol (qw(same modified added removed)) {
|
|
|
2355 |
if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
|
|
|
2356 |
$line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
|
|
|
2357 |
} else {
|
|
|
2358 |
$line .= "0${DELIM} ";
|
|
|
2359 |
}
|
|
|
2360 |
}
|
|
|
2361 |
}
|
|
|
2362 |
} else {
|
|
|
2363 |
foreach my $item (qw(nFiles blank comment code)) {
|
|
|
2364 |
next if $BY_FILE and $item eq 'nFiles';
|
|
|
2365 |
foreach my $symbol (qw(same modified added removed)) {
|
|
|
2366 |
if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
|
|
|
2367 |
$line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
|
|
|
2368 |
} else {
|
|
|
2369 |
$line .= "0${DELIM} ";
|
|
|
2370 |
}
|
|
|
2371 |
}
|
|
|
2372 |
}
|
|
|
2373 |
}
|
|
|
2374 |
push @results, $line;
|
|
|
2375 |
}
|
|
|
2376 |
|
|
|
2377 |
print "<- diff_csv_report\n" if $opt_v > 2;
|
|
|
2378 |
return @results;
|
|
|
2379 |
} # 1}}}
|
|
|
2380 |
sub rm_leading_tempdir { # {{{1
|
|
|
2381 |
my ($in_file, $rh_temp_dirs, ) = @_;
|
|
|
2382 |
my $clean_filename = $in_file;
|
|
|
2383 |
foreach my $temp_d (keys %{$rh_temp_dirs}) {
|
|
|
2384 |
if ($ON_WINDOWS) {
|
|
|
2385 |
# \ -> / necessary to allow the next if test's
|
|
|
2386 |
# m{} to work in the presence of spaces in file names
|
|
|
2387 |
$temp_d =~ s{\\}{/}g;
|
|
|
2388 |
$clean_filename =~ s{\\}{/}g;
|
|
|
2389 |
}
|
|
|
2390 |
if ($clean_filename =~ m{^$temp_d/}) {
|
|
|
2391 |
$clean_filename =~ s{^$temp_d/}{};
|
|
|
2392 |
last;
|
|
|
2393 |
}
|
|
|
2394 |
}
|
|
|
2395 |
$clean_filename =~ s{/}{\\}g if $ON_WINDOWS; # then go back from / to \
|
|
|
2396 |
return $clean_filename;
|
|
|
2397 |
} # 1}}}
|
|
|
2398 |
sub generate_sql { # {{{1
|
|
|
2399 |
my ($elapsed_sec, # in
|
|
|
2400 |
$rhh_count , # in count{TYPE}{lang|code|blank|comment|scaled}
|
|
|
2401 |
$rh_scale , # in
|
|
|
2402 |
) = @_;
|
|
|
2403 |
print "-> generate_sql\n" if $opt_v > 2;
|
|
|
2404 |
|
|
|
2405 |
$opt_sql_project = cwd() unless defined $opt_sql_project;
|
|
|
2406 |
$opt_sql_project =~ s{/}{\\}g if $ON_WINDOWS;
|
|
|
2407 |
|
|
|
2408 |
my $schema = undef;
|
|
|
2409 |
if ($opt_sql_style eq "oracle") {
|
|
|
2410 |
$schema = "
|
|
|
2411 |
CREATE TABLE metadata
|
|
|
2412 |
(
|
|
|
2413 |
timestamp TIMESTAMP,
|
|
|
2414 |
project VARCHAR2(500 CHAR),
|
|
|
2415 |
elapsed_s NUMBER(10, 6)
|
|
|
2416 |
)
|
|
|
2417 |
/
|
|
|
2418 |
|
|
|
2419 |
CREATE TABLE t
|
|
|
2420 |
(
|
|
|
2421 |
project VARCHAR2(500 CHAR),
|
|
|
2422 |
language VARCHAR2(500 CHAR),
|
|
|
2423 |
file_fullname VARCHAR2(500 CHAR),
|
|
|
2424 |
file_dirname VARCHAR2(500 CHAR),
|
|
|
2425 |
file_basename VARCHAR2(500 CHAR),
|
|
|
2426 |
nblank INTEGER,
|
|
|
2427 |
ncomment INTEGER,
|
|
|
2428 |
ncode INTEGER,
|
|
|
2429 |
nscaled NUMBER(10, 6)
|
|
|
2430 |
)
|
|
|
2431 |
/
|
|
|
2432 |
|
|
|
2433 |
";
|
|
|
2434 |
} else {
|
|
|
2435 |
$schema = "
|
|
|
2436 |
create table metadata ( -- $URL v $VERSION
|
|
|
2437 |
timestamp varchar(500),
|
|
|
2438 |
Project varchar(500),
|
|
|
2439 |
elapsed_s real);
|
|
|
2440 |
create table t (
|
|
|
2441 |
Project varchar(500) ,
|
|
|
2442 |
Language varchar(500) ,
|
|
|
2443 |
File varchar(500) ,
|
|
|
2444 |
File_dirname varchar(500) ,
|
|
|
2445 |
File_basename varchar(500) ,
|
|
|
2446 |
nBlank integer ,
|
|
|
2447 |
nComment integer ,
|
|
|
2448 |
nCode integer ,
|
|
|
2449 |
nScaled real );
|
|
|
2450 |
";
|
|
|
2451 |
}
|
|
|
2452 |
$opt_sql = "-" if $opt_sql eq "1";
|
|
|
2453 |
|
|
|
2454 |
my $open_mode = ">";
|
|
|
2455 |
$open_mode = ">>" if $opt_sql_append;
|
|
|
2456 |
|
|
|
2457 |
my $fh = new IO::File; # $opt_sql, "w";
|
|
|
2458 |
if (!$fh->open("${open_mode}${opt_sql}")) {
|
|
|
2459 |
die "Unable to write to $opt_sql $!\n";
|
|
|
2460 |
}
|
|
|
2461 |
print $fh $schema unless defined $opt_sql_append;
|
|
|
2462 |
|
|
|
2463 |
if ($opt_sql_style eq "oracle") {
|
|
|
2464 |
printf $fh "insert into metadata values(TO_TIMESTAMP('%s','yyyy-mm-dd hh24:mi:ss'), '%s', %f);\n",
|
|
|
2465 |
strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
|
|
|
2466 |
$opt_sql_project, $elapsed_sec;
|
|
|
2467 |
} else {
|
|
|
2468 |
print $fh "begin transaction;\n";
|
|
|
2469 |
printf $fh "insert into metadata values('%s', '%s', %f);\n",
|
|
|
2470 |
strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
|
|
|
2471 |
$opt_sql_project, $elapsed_sec;
|
|
|
2472 |
}
|
|
|
2473 |
|
|
|
2474 |
my $nIns = 0;
|
|
|
2475 |
foreach my $file (keys %{$rhh_count}) {
|
|
|
2476 |
my $language = $rhh_count->{$file}{'lang'};
|
|
|
2477 |
my $clean_filename = $file;
|
|
|
2478 |
# If necessary (that is, if the input contained an
|
|
|
2479 |
# archive file [.tar.gz, etc]), strip the temporary
|
|
|
2480 |
# directory name which was used to expand the archive
|
|
|
2481 |
# from the file name.
|
|
|
2482 |
|
|
|
2483 |
$clean_filename = rm_leading_tempdir($clean_filename, \%TEMP_DIR);
|
|
|
2484 |
$clean_filename =~ s/\'/''/g; # double embedded single quotes
|
|
|
2485 |
# to escape them
|
|
|
2486 |
|
|
|
2487 |
printf $fh "insert into t values('%s', '%s', '%s', '%s', '%s', " .
|
|
|
2488 |
"%d, %d, %d, %f);\n",
|
|
|
2489 |
$opt_sql_project ,
|
|
|
2490 |
$language ,
|
|
|
2491 |
$clean_filename ,
|
|
|
2492 |
dirname( $clean_filename) ,
|
|
|
2493 |
basename($clean_filename) ,
|
|
|
2494 |
$rhh_count->{$file}{'blank'},
|
|
|
2495 |
$rhh_count->{$file}{'comment'},
|
|
|
2496 |
$rhh_count->{$file}{'code'} ,
|
|
|
2497 |
$rhh_count->{$file}{'code'}*$rh_scale->{$language};
|
|
|
2498 |
|
|
|
2499 |
++$nIns;
|
|
|
2500 |
if (!($nIns % 10_000) and ($opt_sql_style ne "oracle")) {
|
|
|
2501 |
print $fh "commit;\n";
|
|
|
2502 |
print $fh "begin transaction;\n";
|
|
|
2503 |
}
|
|
|
2504 |
}
|
|
|
2505 |
if ($opt_sql_style ne "oracle") {
|
|
|
2506 |
print $fh "commit;\n";
|
|
|
2507 |
}
|
|
|
2508 |
|
|
|
2509 |
$fh->close unless $opt_sql eq "-"; # don't try to close STDOUT
|
|
|
2510 |
print "<- generate_sql\n" if $opt_v > 2;
|
|
|
2511 |
|
|
|
2512 |
# sample query:
|
|
|
2513 |
#
|
|
|
2514 |
# select project, language,
|
|
|
2515 |
# sum(nCode) as Code,
|
|
|
2516 |
# sum(nComment) as Comments,
|
|
|
2517 |
# sum(nBlank) as Blank,
|
|
|
2518 |
# sum(nCode)+sum(nComment)+sum(nBlank) as All_Lines,
|
|
|
2519 |
# 100.0*sum(nComment)/(sum(nCode)+sum(nComment)) as Comment_Pct
|
|
|
2520 |
# from t group by Project, Language order by Project, Code desc;
|
|
|
2521 |
#
|
|
|
2522 |
} # 1}}}
|
|
|
2523 |
sub output_header { # {{{1
|
|
|
2524 |
my ($header_line,
|
|
|
2525 |
$hyphen_line,
|
|
|
2526 |
$BY_FILE ,) = @_;
|
|
|
2527 |
print "-> output_header\n" if $opt_v > 2;
|
|
|
2528 |
my @R = ();
|
|
|
2529 |
if ($opt_xml) {
|
|
|
2530 |
if (!$ALREADY_SHOWED_XML_SECTION) {
|
|
|
2531 |
push @R, "<?xml version=\"1.0\"?>";
|
|
|
2532 |
push @R, '<?xml-stylesheet type="text/xsl" href="' .
|
|
|
2533 |
$opt_xsl . '"?>' if $opt_xsl;
|
|
|
2534 |
push @R, "<results>";
|
|
|
2535 |
push @R, "<header>$header_line</header>";
|
|
|
2536 |
$ALREADY_SHOWED_XML_SECTION = 1;
|
|
|
2537 |
}
|
|
|
2538 |
if ($BY_FILE) {
|
|
|
2539 |
push @R, "<files>";
|
|
|
2540 |
} else {
|
|
|
2541 |
push @R, "<languages>";
|
|
|
2542 |
}
|
|
|
2543 |
} elsif ($opt_yaml) {
|
|
|
2544 |
push @R, "---\n# $header_line";
|
|
|
2545 |
} elsif ($opt_csv) {
|
|
|
2546 |
# append the header to the end of the column headers
|
|
|
2547 |
# to keep the output a bit cleaner from a spreadsheet
|
|
|
2548 |
# perspective
|
|
|
2549 |
} else {
|
|
|
2550 |
if ($ALREADY_SHOWED_HEADER) {
|
|
|
2551 |
push @R, "";
|
|
|
2552 |
} else {
|
|
|
2553 |
push @R, $header_line;
|
|
|
2554 |
$ALREADY_SHOWED_HEADER = 1;
|
|
|
2555 |
}
|
|
|
2556 |
push @R, $hyphen_line;
|
|
|
2557 |
}
|
|
|
2558 |
print "<- output_header\n" if $opt_v > 2;
|
|
|
2559 |
return @R;
|
|
|
2560 |
} # 1}}}
|
|
|
2561 |
sub generate_report { # {{{1
|
|
|
2562 |
# returns an array of lines containing the results
|
|
|
2563 |
my ($version , # in
|
|
|
2564 |
$elapsed_sec, # in
|
|
|
2565 |
$report_type, # in "by language" | "by report file" | "by file"
|
|
|
2566 |
$rhh_count , # in count{TYPE}{nFiles|code|blank|comment|scaled}
|
|
|
2567 |
$rh_scale , # in
|
|
|
2568 |
) = @_;
|
|
|
2569 |
|
|
|
2570 |
print "-> generate_report\n" if $opt_v > 2;
|
|
|
2571 |
my $DELIM = ",";
|
|
|
2572 |
$DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
|
|
|
2573 |
|
|
|
2574 |
my @results = ();
|
|
|
2575 |
|
|
|
2576 |
my $languages = ();
|
|
|
2577 |
|
|
|
2578 |
my $sum_files = 0;
|
|
|
2579 |
my $sum_code = 0;
|
|
|
2580 |
my $sum_blank = 0;
|
|
|
2581 |
my $sum_comment = 0;
|
|
|
2582 |
my $max_len = 0;
|
|
|
2583 |
foreach my $language (keys %{$rhh_count}) {
|
|
|
2584 |
$sum_files += $rhh_count->{$language}{'nFiles'} ;
|
|
|
2585 |
$sum_blank += $rhh_count->{$language}{'blank'} ;
|
|
|
2586 |
$sum_comment += $rhh_count->{$language}{'comment'};
|
|
|
2587 |
$sum_code += $rhh_count->{$language}{'code'} ;
|
|
|
2588 |
$max_len = length($language) if length($language) > $max_len;
|
|
|
2589 |
}
|
|
|
2590 |
my $column_1_offset = 0;
|
|
|
2591 |
$column_1_offset = $max_len - 17 if $max_len > 17;
|
|
|
2592 |
my $sum_lines = $sum_blank + $sum_comment + $sum_code;
|
|
|
2593 |
$elapsed_sec = 0.5 unless $elapsed_sec;
|
|
|
2594 |
|
|
|
2595 |
my $spacing_0 = 23;
|
|
|
2596 |
my $spacing_1 = 13;
|
|
|
2597 |
my $spacing_2 = 9;
|
|
|
2598 |
my $spacing_3 = 17;
|
|
|
2599 |
if (!$opt_3) {
|
|
|
2600 |
$spacing_1 = 19;
|
|
|
2601 |
$spacing_2 = 14;
|
|
|
2602 |
$spacing_3 = 28;
|
|
|
2603 |
}
|
|
|
2604 |
$spacing_0 += $column_1_offset;
|
|
|
2605 |
$spacing_1 += $column_1_offset;
|
|
|
2606 |
$spacing_3 += $column_1_offset;
|
|
|
2607 |
my %Format = (
|
|
|
2608 |
'1' => { 'xml' => 'name="%s" ',
|
|
|
2609 |
'txt' => "\%-${spacing_0}s ",
|
|
|
2610 |
},
|
|
|
2611 |
'2' => { 'xml' => 'name="%s" ',
|
|
|
2612 |
'txt' => "\%-${spacing_3}s ",
|
|
|
2613 |
},
|
|
|
2614 |
'3' => { 'xml' => 'files_count="%d" ',
|
|
|
2615 |
'txt' => '%5d ',
|
|
|
2616 |
},
|
|
|
2617 |
'4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
|
|
|
2618 |
'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
|
|
|
2619 |
},
|
|
|
2620 |
'5' => { 'xml' => 'blank="%3.2f" comment="%3.2f" code="%d" ',
|
|
|
2621 |
'txt' => "\%14.2f \%14.2f \%${spacing_2}d",
|
|
|
2622 |
},
|
|
|
2623 |
'6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
|
|
|
2624 |
'txt' => ' x %6.2f = %14.2f',
|
|
|
2625 |
},
|
|
|
2626 |
);
|
|
|
2627 |
my $Style = "txt";
|
|
|
2628 |
$Style = "xml" if $opt_xml ;
|
|
|
2629 |
$Style = "xml" if $opt_yaml; # not a typo; just set to anything but txt
|
|
|
2630 |
$Style = "xml" if $opt_csv ; # not a typo; just set to anything but txt
|
|
|
2631 |
|
|
|
2632 |
my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
|
|
|
2633 |
$hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
|
|
|
2634 |
if (!$opt_3) and (68 + $column_1_offset) > 79;
|
|
|
2635 |
my $data_line = "";
|
|
|
2636 |
my $first_column;
|
|
|
2637 |
my $BY_LANGUAGE = 0;
|
|
|
2638 |
my $BY_FILE = 0;
|
|
|
2639 |
if ($report_type eq "by language") {
|
|
|
2640 |
$first_column = "Language";
|
|
|
2641 |
$BY_LANGUAGE = 1;
|
|
|
2642 |
} elsif ($report_type eq "by file") {
|
|
|
2643 |
$first_column = "File";
|
|
|
2644 |
$BY_FILE = 1;
|
|
|
2645 |
} elsif ($report_type eq "by report file") {
|
|
|
2646 |
$first_column = "File";
|
|
|
2647 |
} else {
|
|
|
2648 |
$first_column = "Report File";
|
|
|
2649 |
}
|
|
|
2650 |
|
|
|
2651 |
my $header_line = sprintf "%s v %s", $URL, $version;
|
|
|
2652 |
$header_line .= sprintf(" T=%.2f s (%.1f files/s, %.1f lines/s)",
|
|
|
2653 |
$elapsed_sec ,
|
|
|
2654 |
$sum_files/$elapsed_sec,
|
|
|
2655 |
$sum_lines/$elapsed_sec) unless $opt_sum_reports;
|
|
|
2656 |
if ($opt_xml or $opt_yaml) {
|
|
|
2657 |
if (!$ALREADY_SHOWED_HEADER) {
|
|
|
2658 |
push @results, xml_or_yaml_header($URL, $version, $elapsed_sec,
|
|
|
2659 |
$sum_files, $sum_lines, $BY_FILE);
|
|
|
2660 |
$ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
|
|
|
2661 |
# --sum-reports yields two xml or yaml files, one by
|
|
|
2662 |
# language and one by report file, each of which needs a header
|
|
|
2663 |
}
|
|
|
2664 |
if ($opt_xml) {
|
|
|
2665 |
if ($BY_FILE or ($report_type eq "by report file")) {
|
|
|
2666 |
push @results, "<files>";
|
|
|
2667 |
} else {
|
|
|
2668 |
push @results, "<languages>";
|
|
|
2669 |
}
|
|
|
2670 |
}
|
|
|
2671 |
} else {
|
|
|
2672 |
push @results, output_header($header_line, $hyphen_line, $BY_FILE);
|
|
|
2673 |
}
|
|
|
2674 |
|
|
|
2675 |
if ($Style eq "txt") {
|
|
|
2676 |
# column headers
|
|
|
2677 |
if (!$opt_3 and $BY_FILE) {
|
|
|
2678 |
my $spacing_n = $spacing_1 - 11;
|
|
|
2679 |
$data_line = sprintf "%-${spacing_n}s ", $first_column;
|
|
|
2680 |
} else {
|
|
|
2681 |
$data_line = sprintf "%-${spacing_1}s ", $first_column;
|
|
|
2682 |
}
|
|
|
2683 |
if ($BY_FILE) {
|
|
|
2684 |
$data_line .= sprintf "%${spacing_2}s " , " " ;
|
|
|
2685 |
} else {
|
|
|
2686 |
$data_line .= sprintf "%${spacing_2}s " , "files";
|
|
|
2687 |
}
|
|
|
2688 |
my $PCT_symbol = "";
|
|
|
2689 |
$PCT_symbol = " \%" if $opt_by_percent;
|
|
|
2690 |
$data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
|
|
|
2691 |
"blank${PCT_symbol}" ,
|
|
|
2692 |
"comment${PCT_symbol}" ,
|
|
|
2693 |
"code";
|
|
|
2694 |
$data_line .= sprintf " %8s %14s",
|
|
|
2695 |
"scale" ,
|
|
|
2696 |
"3rd gen. equiv"
|
|
|
2697 |
if $opt_3;
|
|
|
2698 |
push @results, $data_line;
|
|
|
2699 |
push @results, $hyphen_line;
|
|
|
2700 |
}
|
|
|
2701 |
if ($opt_csv) {
|
|
|
2702 |
my $header2;
|
|
|
2703 |
if ($BY_FILE) {
|
|
|
2704 |
$header2 = "language${DELIM}filename";
|
|
|
2705 |
} else {
|
|
|
2706 |
$header2 = "files${DELIM}language";
|
|
|
2707 |
}
|
|
|
2708 |
$header2 .= "${DELIM}blank${DELIM}comment${DELIM}code";
|
|
|
2709 |
$header2 .= "${DELIM}scale${DELIM}3rd gen. equiv" if $opt_3;
|
|
|
2710 |
$header2 .= ${DELIM} . '"' . $header_line . '"';
|
|
|
2711 |
push @results, $header2;
|
|
|
2712 |
}
|
|
|
2713 |
|
|
|
2714 |
my $sum_scaled = 0;
|
|
|
2715 |
####foreach my $lang_or_file (keys %{$rhh_count}) {
|
|
|
2716 |
#### $rhh_count->{$lang_or_file}{'code'} = 0 unless
|
|
|
2717 |
#### defined $rhh_count->{$lang_or_file}{'code'};
|
|
|
2718 |
####}
|
|
|
2719 |
foreach my $lang_or_file (sort {
|
|
|
2720 |
$rhh_count->{$b}{'code'} <=>
|
|
|
2721 |
$rhh_count->{$a}{'code'}
|
|
|
2722 |
}
|
|
|
2723 |
keys %{$rhh_count}) {
|
|
|
2724 |
next if $lang_or_file eq "by report file";
|
|
|
2725 |
my ($factor, $scaled);
|
|
|
2726 |
if ($BY_LANGUAGE or $BY_FILE) {
|
|
|
2727 |
$factor = 1;
|
|
|
2728 |
if ($BY_LANGUAGE) {
|
|
|
2729 |
if (defined $rh_scale->{$lang_or_file}) {
|
|
|
2730 |
$factor = $rh_scale->{$lang_or_file};
|
|
|
2731 |
} else {
|
|
|
2732 |
warn "No scale factor for $lang_or_file; using 1.00";
|
|
|
2733 |
}
|
|
|
2734 |
} else { # by individual code file
|
|
|
2735 |
if ($report_type ne "by report file") {
|
|
|
2736 |
next unless defined $rhh_count->{$lang_or_file}{'lang'};
|
|
|
2737 |
next unless defined $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
|
|
|
2738 |
$factor = $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
|
|
|
2739 |
}
|
|
|
2740 |
}
|
|
|
2741 |
$scaled = $factor*$rhh_count->{$lang_or_file}{'code'};
|
|
|
2742 |
} else {
|
|
|
2743 |
if (!defined $rhh_count->{$lang_or_file}{'scaled'}) {
|
|
|
2744 |
$opt_3 = 0;
|
|
|
2745 |
# If we're summing together files previously generated
|
|
|
2746 |
# with --no3 then rhh_count->{$lang_or_file}{'scaled'}
|
|
|
2747 |
# this variable will be undefined. That should only
|
|
|
2748 |
# happen when summing together by file however.
|
|
|
2749 |
} elsif ($BY_LANGUAGE) {
|
|
|
2750 |
warn "Missing scaled language info for $lang_or_file\n";
|
|
|
2751 |
}
|
|
|
2752 |
if ($opt_3) {
|
|
|
2753 |
$scaled = $rhh_count->{$lang_or_file}{'scaled'};
|
|
|
2754 |
$factor = $scaled/$rhh_count->{$lang_or_file}{'code'};
|
|
|
2755 |
}
|
|
|
2756 |
}
|
|
|
2757 |
|
|
|
2758 |
if ($BY_FILE) {
|
|
|
2759 |
my $clean_filename = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
|
|
|
2760 |
$clean_filename = xml_metachars($clean_filename) if $opt_xml;
|
|
|
2761 |
$data_line = sprintf $Format{'1'}{$Style}, $clean_filename;
|
|
|
2762 |
} else {
|
|
|
2763 |
$data_line = sprintf $Format{'2'}{$Style}, $lang_or_file;
|
|
|
2764 |
}
|
|
|
2765 |
$data_line .= sprintf $Format{3}{$Style} ,
|
|
|
2766 |
$rhh_count->{$lang_or_file}{'nFiles'} unless $BY_FILE;
|
|
|
2767 |
if ($opt_by_percent) {
|
|
|
2768 |
my $DEN = compute_denominator($opt_by_percent ,
|
|
|
2769 |
$rhh_count->{$lang_or_file}{'code'} ,
|
|
|
2770 |
$rhh_count->{$lang_or_file}{'comment'},
|
|
|
2771 |
$rhh_count->{$lang_or_file}{'blank'} );
|
|
|
2772 |
$data_line .= sprintf $Format{5}{$Style} ,
|
|
|
2773 |
$rhh_count->{$lang_or_file}{'blank'} / $DEN * 100,
|
|
|
2774 |
$rhh_count->{$lang_or_file}{'comment'} / $DEN * 100,
|
|
|
2775 |
$rhh_count->{$lang_or_file}{'code'} ;
|
|
|
2776 |
} else {
|
|
|
2777 |
$data_line .= sprintf $Format{4}{$Style} ,
|
|
|
2778 |
$rhh_count->{$lang_or_file}{'blank'} ,
|
|
|
2779 |
$rhh_count->{$lang_or_file}{'comment'},
|
|
|
2780 |
$rhh_count->{$lang_or_file}{'code'} ;
|
|
|
2781 |
}
|
|
|
2782 |
$data_line .= sprintf $Format{6}{$Style} ,
|
|
|
2783 |
$factor ,
|
|
|
2784 |
$scaled if $opt_3;
|
|
|
2785 |
$sum_scaled += $scaled if $opt_3;
|
|
|
2786 |
|
|
|
2787 |
if ($opt_xml) {
|
|
|
2788 |
if (defined $rhh_count->{$lang_or_file}{'lang'}) {
|
|
|
2789 |
my $lang = $rhh_count->{$lang_or_file}{'lang'};
|
|
|
2790 |
if (!defined $languages->{$lang}) {
|
|
|
2791 |
$languages->{$lang} = $lang;
|
|
|
2792 |
}
|
|
|
2793 |
$data_line.=' language="' . $lang . '" ';
|
|
|
2794 |
}
|
|
|
2795 |
if ($BY_FILE or ($report_type eq "by report file")) {
|
|
|
2796 |
push @results, " <file " . $data_line . "/>";
|
|
|
2797 |
} else {
|
|
|
2798 |
push @results, " <language " . $data_line . "/>";
|
|
|
2799 |
}
|
|
|
2800 |
} elsif ($opt_yaml) {
|
|
|
2801 |
push @results,$lang_or_file . ":";
|
|
|
2802 |
push @results," nFiles: " .$rhh_count->{$lang_or_file}{'nFiles'}
|
|
|
2803 |
unless $BY_FILE;
|
|
|
2804 |
if ($opt_by_percent) {
|
|
|
2805 |
my $DEN = compute_denominator($opt_by_percent ,
|
|
|
2806 |
$rhh_count->{$lang_or_file}{'code'} ,
|
|
|
2807 |
$rhh_count->{$lang_or_file}{'comment'},
|
|
|
2808 |
$rhh_count->{$lang_or_file}{'blank'} );
|
|
|
2809 |
push @results," blank: " . sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'} / $DEN * 100);
|
|
|
2810 |
push @results," comment: " . sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100);
|
|
|
2811 |
push @results," code: " . $rhh_count->{$lang_or_file}{'code'} ;
|
|
|
2812 |
} else {
|
|
|
2813 |
push @results," blank: " . $rhh_count->{$lang_or_file}{'blank'} ;
|
|
|
2814 |
push @results," comment: " . $rhh_count->{$lang_or_file}{'comment'};
|
|
|
2815 |
push @results," code: " . $rhh_count->{$lang_or_file}{'code'} ;
|
|
|
2816 |
}
|
|
|
2817 |
push @results," language: ".$rhh_count->{$lang_or_file}{'lang'}
|
|
|
2818 |
if $BY_FILE;
|
|
|
2819 |
if ($opt_3) {
|
|
|
2820 |
push @results, " scaled: " . $scaled;
|
|
|
2821 |
push @results, " factor: " . $factor;
|
|
|
2822 |
}
|
|
|
2823 |
} elsif ($opt_csv) {
|
|
|
2824 |
my $extra_3 = "";
|
|
|
2825 |
$extra_3 = "${DELIM}$factor${DELIM}$scaled" if $opt_3;
|
|
|
2826 |
my $first_column = undef;
|
|
|
2827 |
my $clean_name = $lang_or_file;
|
|
|
2828 |
if ($BY_FILE) {
|
|
|
2829 |
$first_column = $rhh_count->{$lang_or_file}{'lang'};
|
|
|
2830 |
$clean_name = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
|
|
|
2831 |
} else {
|
|
|
2832 |
$first_column = $rhh_count->{$lang_or_file}{'nFiles'};
|
|
|
2833 |
}
|
|
|
2834 |
my $str = $first_column . ${DELIM} .
|
|
|
2835 |
$clean_name . ${DELIM};
|
|
|
2836 |
if ($opt_by_percent) {
|
|
|
2837 |
my $DEN = compute_denominator($opt_by_percent ,
|
|
|
2838 |
$rhh_count->{$lang_or_file}{'code'}{'code'} ,
|
|
|
2839 |
$rhh_count->{$lang_or_file}{'code'}{'comment'},
|
|
|
2840 |
$rhh_count->{$lang_or_file}{'code'}{'blank'} );
|
|
|
2841 |
$str .= sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'} / $DEN * 100) . ${DELIM} .
|
|
|
2842 |
sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . ${DELIM} .
|
|
|
2843 |
$rhh_count->{$lang_or_file}{'code'};
|
|
|
2844 |
} else {
|
|
|
2845 |
$str .= $rhh_count->{$lang_or_file}{'blank'} . ${DELIM} .
|
|
|
2846 |
$rhh_count->{$lang_or_file}{'comment'}. ${DELIM} .
|
|
|
2847 |
$rhh_count->{$lang_or_file}{'code'};
|
|
|
2848 |
}
|
|
|
2849 |
$str .= $extra_3;
|
|
|
2850 |
push @results, $str;
|
|
|
2851 |
} else {
|
|
|
2852 |
push @results, $data_line;
|
|
|
2853 |
}
|
|
|
2854 |
}
|
|
|
2855 |
|
|
|
2856 |
my $avg_scale = 1; # weighted average of scale factors
|
|
|
2857 |
$avg_scale = sprintf("%.2f", $sum_scaled / $sum_code)
|
|
|
2858 |
if $sum_code and $opt_3;
|
|
|
2859 |
|
|
|
2860 |
if ($opt_xml) {
|
|
|
2861 |
$data_line = "";
|
|
|
2862 |
if (!$BY_FILE) {
|
|
|
2863 |
$data_line .= sprintf "sum_files=\"%d\" ", $sum_files;
|
|
|
2864 |
}
|
|
|
2865 |
if ($opt_by_percent) {
|
|
|
2866 |
my $DEN = compute_denominator($opt_by_percent ,
|
|
|
2867 |
$sum_code, $sum_comment, $sum_blank);
|
|
|
2868 |
$data_line .= sprintf $Format{'5'}{$Style},
|
|
|
2869 |
$sum_blank / $DEN * 100,
|
|
|
2870 |
$sum_comment / $DEN * 100,
|
|
|
2871 |
$sum_code ;
|
|
|
2872 |
} else {
|
|
|
2873 |
$data_line .= sprintf $Format{'4'}{$Style},
|
|
|
2874 |
$sum_blank ,
|
|
|
2875 |
$sum_comment ,
|
|
|
2876 |
$sum_code ;
|
|
|
2877 |
}
|
|
|
2878 |
$data_line .= sprintf $Format{'6'}{$Style},
|
|
|
2879 |
$avg_scale ,
|
|
|
2880 |
$sum_scaled if $opt_3;
|
|
|
2881 |
push @results, " <total " . $data_line . "/>";
|
|
|
2882 |
|
|
|
2883 |
if ($BY_FILE or ($report_type eq "by report file")) {
|
|
|
2884 |
push @results, "</files>";
|
|
|
2885 |
} else {
|
|
|
2886 |
foreach my $language (keys %{$languages}) {
|
|
|
2887 |
push @results, ' <language name="' . $language . '"/>';
|
|
|
2888 |
}
|
|
|
2889 |
push @results, "</languages>";
|
|
|
2890 |
}
|
|
|
2891 |
|
|
|
2892 |
if (!$opt_by_file_by_lang or $ALREADY_SHOWED_XML_SECTION) {
|
|
|
2893 |
push @results, "</results>";
|
|
|
2894 |
} else {
|
|
|
2895 |
$ALREADY_SHOWED_XML_SECTION = 1;
|
|
|
2896 |
}
|
|
|
2897 |
} elsif ($opt_yaml) {
|
|
|
2898 |
push @results, "SUM:";
|
|
|
2899 |
if ($opt_by_percent) {
|
|
|
2900 |
my $DEN = compute_denominator($opt_by_percent ,
|
|
|
2901 |
$sum_code, $sum_comment, $sum_blank);
|
|
|
2902 |
push @results, " blank: " . sprintf("%.2f", $sum_blank / $DEN * 100);
|
|
|
2903 |
push @results, " comment: ". sprintf("%.2f", $sum_comment / $DEN * 100);
|
|
|
2904 |
push @results, " code: " . $sum_code ;
|
|
|
2905 |
} else {
|
|
|
2906 |
push @results, " blank: " . $sum_blank ;
|
|
|
2907 |
push @results, " comment: ". $sum_comment;
|
|
|
2908 |
push @results, " code: " . $sum_code ;
|
|
|
2909 |
}
|
|
|
2910 |
push @results, " nFiles: " . $sum_files ;
|
|
|
2911 |
if ($opt_3) {
|
|
|
2912 |
push @results, " scaled: " . $sum_scaled;
|
|
|
2913 |
push @results, " factor: " . $avg_scale ;
|
|
|
2914 |
}
|
|
|
2915 |
} elsif ($opt_csv) {
|
|
|
2916 |
# do nothing
|
|
|
2917 |
} else {
|
|
|
2918 |
|
|
|
2919 |
if ($BY_FILE) {
|
|
|
2920 |
$data_line = sprintf "%-${spacing_0}s ", "SUM:" ;
|
|
|
2921 |
} else {
|
|
|
2922 |
$data_line = sprintf "%-${spacing_1}s ", "SUM:" ;
|
|
|
2923 |
$data_line .= sprintf "%${spacing_2}d ", $sum_files;
|
|
|
2924 |
}
|
|
|
2925 |
if ($opt_by_percent) {
|
|
|
2926 |
my $DEN = compute_denominator($opt_by_percent ,
|
|
|
2927 |
$sum_code, $sum_comment, $sum_blank);
|
|
|
2928 |
$data_line .= sprintf $Format{'5'}{$Style},
|
|
|
2929 |
$sum_blank / $DEN * 100,
|
|
|
2930 |
$sum_comment / $DEN * 100,
|
|
|
2931 |
$sum_code ;
|
|
|
2932 |
} else {
|
|
|
2933 |
$data_line .= sprintf $Format{'4'}{$Style},
|
|
|
2934 |
$sum_blank ,
|
|
|
2935 |
$sum_comment ,
|
|
|
2936 |
$sum_code ;
|
|
|
2937 |
}
|
|
|
2938 |
$data_line .= sprintf $Format{'6'}{$Style},
|
|
|
2939 |
$avg_scale ,
|
|
|
2940 |
$sum_scaled if $opt_3;
|
|
|
2941 |
push @results, $hyphen_line if $sum_files > 1 or $opt_sum_one;
|
|
|
2942 |
push @results, $data_line if $sum_files > 1 or $opt_sum_one;
|
|
|
2943 |
push @results, $hyphen_line;
|
|
|
2944 |
}
|
|
|
2945 |
write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
|
|
|
2946 |
print "<- generate_report\n" if $opt_v > 2;
|
|
|
2947 |
return @results;
|
|
|
2948 |
} # 1}}}
|
|
|
2949 |
sub print_errors { # {{{1
|
|
|
2950 |
my ($rh_Error_Codes, # in
|
|
|
2951 |
$raa_errors , # in
|
|
|
2952 |
) = @_;
|
|
|
2953 |
|
|
|
2954 |
print "-> print_errors\n" if $opt_v > 2;
|
|
|
2955 |
my %error_string = reverse(%{$rh_Error_Codes});
|
|
|
2956 |
my $nErrors = scalar @{$raa_errors};
|
|
|
2957 |
warn sprintf "\n%d error%s:\n", plural_form(scalar @Errors);
|
|
|
2958 |
for (my $i = 0; $i < $nErrors; $i++) {
|
|
|
2959 |
warn sprintf "%s: %s\n",
|
|
|
2960 |
$error_string{ $raa_errors->[$i][0] },
|
|
|
2961 |
$raa_errors->[$i][1] ;
|
|
|
2962 |
}
|
|
|
2963 |
print "<- print_errors\n" if $opt_v > 2;
|
|
|
2964 |
|
|
|
2965 |
} # 1}}}
|
|
|
2966 |
sub write_lang_def { # {{{1
|
|
|
2967 |
my ($file ,
|
|
|
2968 |
$rh_Language_by_Extension , # in
|
|
|
2969 |
$rh_Language_by_Script , # in
|
|
|
2970 |
$rh_Language_by_File , # in
|
|
|
2971 |
$rhaa_Filters_by_Language , # in
|
|
|
2972 |
$rh_Not_Code_Extension , # in
|
|
|
2973 |
$rh_Not_Code_Filename , # in
|
|
|
2974 |
$rh_Scale_Factor , # in
|
|
|
2975 |
$rh_EOL_Continuation_re , # in
|
|
|
2976 |
) = @_;
|
|
|
2977 |
|
|
|
2978 |
print "-> write_lang_def($file)\n" if $opt_v > 2;
|
|
|
2979 |
my $OUT = new IO::File $file, "w";
|
|
|
2980 |
die "Unable to write to $file\n" unless defined $OUT;
|
|
|
2981 |
|
|
|
2982 |
foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
|
|
|
2983 |
next if $language eq "MATLAB/Objective C/MUMPS/Mercury" or
|
|
|
2984 |
$language eq "PHP/Pascal" or
|
|
|
2985 |
$language eq "Pascal/Puppet" or
|
|
|
2986 |
$language eq "Lisp/OpenCL" or
|
|
|
2987 |
$language eq "Lisp/Julia" or
|
|
|
2988 |
$language eq "Perl/Prolog" or
|
|
|
2989 |
$language eq "D/dtrace" or
|
|
|
2990 |
$language eq "IDL/Qt Project/Prolog" or
|
|
|
2991 |
$language eq "(unknown)";
|
|
|
2992 |
printf $OUT "%s\n", $language;
|
|
|
2993 |
foreach my $filter (@{$rhaa_Filters_by_Language->{$language}}) {
|
|
|
2994 |
printf $OUT " filter %s", $filter->[0];
|
|
|
2995 |
printf $OUT " %s", $filter->[1] if defined $filter->[1];
|
|
|
2996 |
# $filter->[0] == 'remove_between_general',
|
|
|
2997 |
# 'remove_between_regex', and
|
|
|
2998 |
# 'remove_matches_2re' have two args
|
|
|
2999 |
printf $OUT " %s", $filter->[2] if defined $filter->[2];
|
|
|
3000 |
print $OUT "\n";
|
|
|
3001 |
}
|
|
|
3002 |
foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
|
|
|
3003 |
if ($language eq $rh_Language_by_Extension->{$ext}) {
|
|
|
3004 |
printf $OUT " extension %s\n", $ext;
|
|
|
3005 |
}
|
|
|
3006 |
}
|
|
|
3007 |
foreach my $filename (sort keys %{$rh_Language_by_File}) {
|
|
|
3008 |
if ($language eq $rh_Language_by_File->{$filename}) {
|
|
|
3009 |
printf $OUT " filename %s\n", $filename;
|
|
|
3010 |
}
|
|
|
3011 |
}
|
|
|
3012 |
foreach my $script_exe (sort keys %{$rh_Language_by_Script}) {
|
|
|
3013 |
if ($language eq $rh_Language_by_Script->{$script_exe}) {
|
|
|
3014 |
printf $OUT " script_exe %s\n", $script_exe;
|
|
|
3015 |
}
|
|
|
3016 |
}
|
|
|
3017 |
printf $OUT " 3rd_gen_scale %.2f\n", $rh_Scale_Factor->{$language};
|
|
|
3018 |
if (defined $rh_EOL_Continuation_re->{$language}) {
|
|
|
3019 |
printf $OUT " end_of_line_continuation %s\n",
|
|
|
3020 |
$rh_EOL_Continuation_re->{$language};
|
|
|
3021 |
}
|
|
|
3022 |
}
|
|
|
3023 |
|
|
|
3024 |
$OUT->close;
|
|
|
3025 |
print "<- write_lang_def\n" if $opt_v > 2;
|
|
|
3026 |
} # 1}}}
|
|
|
3027 |
sub read_lang_def { # {{{1
|
|
|
3028 |
my ($file ,
|
|
|
3029 |
$rh_Language_by_Extension , # out
|
|
|
3030 |
$rh_Language_by_Script , # out
|
|
|
3031 |
$rh_Language_by_File , # out
|
|
|
3032 |
$rhaa_Filters_by_Language , # out
|
|
|
3033 |
$rh_Not_Code_Extension , # out
|
|
|
3034 |
$rh_Not_Code_Filename , # out
|
|
|
3035 |
$rh_Scale_Factor , # out
|
|
|
3036 |
$rh_EOL_Continuation_re , # out
|
|
|
3037 |
$rh_EOL_abc,
|
|
|
3038 |
) = @_;
|
|
|
3039 |
|
|
|
3040 |
|
|
|
3041 |
print "-> read_lang_def($file)\n" if $opt_v > 2;
|
|
|
3042 |
my $IN = new IO::File $file, "r";
|
|
|
3043 |
die "Unable to read $file.\n" unless defined $IN;
|
|
|
3044 |
|
|
|
3045 |
my $language = "";
|
|
|
3046 |
while (<$IN>) {
|
|
|
3047 |
next if /^\s*#/ or /^\s*$/;
|
|
|
3048 |
|
|
|
3049 |
if (/^(\w+.*?)\s*$/) {
|
|
|
3050 |
$language = $1;
|
|
|
3051 |
next;
|
|
|
3052 |
}
|
|
|
3053 |
die "Missing computer language name, line $. of $file\n"
|
|
|
3054 |
unless $language;
|
|
|
3055 |
|
|
|
3056 |
if (/^\s{4}filter\s+(remove_between_(general|2re|regex))
|
|
|
3057 |
\s+(\S+)\s+(\S+)s*$/x) {
|
|
|
3058 |
push @{$rhaa_Filters_by_Language->{$language}}, [
|
|
|
3059 |
$1 , $3 , $4 ]
|
|
|
3060 |
|
|
|
3061 |
} elsif (/^\s{4}filter\s+(\w+)\s*$/) {
|
|
|
3062 |
push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
|
|
|
3063 |
|
|
|
3064 |
} elsif (/^\s{4}filter\s+(\w+)\s+(.*?)\s*$/) {
|
|
|
3065 |
push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
|
|
|
3066 |
|
|
|
3067 |
} elsif (/^\s{4}extension\s+(\S+)\s*$/) {
|
|
|
3068 |
if (defined $rh_Language_by_Extension->{$1}) {
|
|
|
3069 |
die "File extension collision: $1 ",
|
|
|
3070 |
"maps to languages '$rh_Language_by_Extension->{$1}' ",
|
|
|
3071 |
"and '$language'\n" ,
|
|
|
3072 |
"Edit $file and remove $1 from one of these two ",
|
|
|
3073 |
"language definitions.\n";
|
|
|
3074 |
}
|
|
|
3075 |
$rh_Language_by_Extension->{$1} = $language;
|
|
|
3076 |
|
|
|
3077 |
} elsif (/^\s{4}filename\s+(\S+)\s*$/) {
|
|
|
3078 |
$rh_Language_by_File->{$1} = $language;
|
|
|
3079 |
|
|
|
3080 |
} elsif (/^\s{4}script_exe\s+(\S+)\s*$/) {
|
|
|
3081 |
$rh_Language_by_Script->{$1} = $language;
|
|
|
3082 |
|
|
|
3083 |
} elsif (/^\s{4}3rd_gen_scale\s+(\S+)\s*$/) {
|
|
|
3084 |
$rh_Scale_Factor->{$language} = $1;
|
|
|
3085 |
|
|
|
3086 |
} elsif (/^\s{4}end_of_line_continuation\s+(\S+)\s*$/) {
|
|
|
3087 |
$rh_EOL_Continuation_re->{$language} = $1;
|
|
|
3088 |
|
|
|
3089 |
} else {
|
|
|
3090 |
die "Unexpected data line $. of $file:\n$_\n";
|
|
|
3091 |
}
|
|
|
3092 |
|
|
|
3093 |
}
|
|
|
3094 |
$IN->close;
|
|
|
3095 |
print "<- read_lang_def\n" if $opt_v > 2;
|
|
|
3096 |
} # 1}}}
|
|
|
3097 |
sub merge_lang_def { # {{{1
|
|
|
3098 |
my ($file ,
|
|
|
3099 |
$rh_Language_by_Extension , # in/out
|
|
|
3100 |
$rh_Language_by_Script , # in/out
|
|
|
3101 |
$rh_Language_by_File , # in/out
|
|
|
3102 |
$rhaa_Filters_by_Language , # in/out
|
|
|
3103 |
$rh_Not_Code_Extension , # in/out
|
|
|
3104 |
$rh_Not_Code_Filename , # in/out
|
|
|
3105 |
$rh_Scale_Factor , # in/out
|
|
|
3106 |
$rh_EOL_Continuation_re , # in/out
|
|
|
3107 |
$rh_EOL_abc,
|
|
|
3108 |
) = @_;
|
|
|
3109 |
|
|
|
3110 |
|
|
|
3111 |
print "-> merge_lang_def($file)\n" if $opt_v > 2;
|
|
|
3112 |
my $IN = new IO::File $file, "r";
|
|
|
3113 |
die "Unable to read $file.\n" unless defined $IN;
|
|
|
3114 |
|
|
|
3115 |
my $language = "";
|
|
|
3116 |
my $already_know_it = undef;
|
|
|
3117 |
while (<$IN>) {
|
|
|
3118 |
next if /^\s*#/ or /^\s*$/;
|
|
|
3119 |
|
|
|
3120 |
if (/^(\w+.*?)\s*$/) {
|
|
|
3121 |
$language = $1;
|
|
|
3122 |
$already_know_it = defined $rh_Scale_Factor->{$language};
|
|
|
3123 |
next;
|
|
|
3124 |
}
|
|
|
3125 |
die "Missing computer language name, line $. of $file\n"
|
|
|
3126 |
unless $language;
|
|
|
3127 |
|
|
|
3128 |
if (/^ filter\s+(\w+)\s*$/) {
|
|
|
3129 |
next if $already_know_it;
|
|
|
3130 |
push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
|
|
|
3131 |
|
|
|
3132 |
} elsif (/^ filter\s+(\w+)\s+(.*?)\s*$/) {
|
|
|
3133 |
next if $already_know_it;
|
|
|
3134 |
push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
|
|
|
3135 |
|
|
|
3136 |
} elsif (/^ extension\s+(\S+)\s*$/) {
|
|
|
3137 |
next if $already_know_it;
|
|
|
3138 |
if (defined $rh_Language_by_Extension->{$1}) {
|
|
|
3139 |
die "File extension collision: $1 ",
|
|
|
3140 |
"maps to languages '$rh_Language_by_Extension->{$1}' ",
|
|
|
3141 |
"and '$language'\n" ,
|
|
|
3142 |
"Edit $file and remove $1 from one of these two ",
|
|
|
3143 |
"language definitions.\n";
|
|
|
3144 |
}
|
|
|
3145 |
$rh_Language_by_Extension->{$1} = $language;
|
|
|
3146 |
|
|
|
3147 |
} elsif (/^ filename\s+(\S+)\s*$/) {
|
|
|
3148 |
next if $already_know_it;
|
|
|
3149 |
$rh_Language_by_File->{$1} = $language;
|
|
|
3150 |
|
|
|
3151 |
} elsif (/^ script_exe\s+(\S+)\s*$/) {
|
|
|
3152 |
next if $already_know_it;
|
|
|
3153 |
$rh_Language_by_Script->{$1} = $language;
|
|
|
3154 |
|
|
|
3155 |
} elsif (/^ 3rd_gen_scale\s+(\S+)\s*$/) {
|
|
|
3156 |
next if $already_know_it;
|
|
|
3157 |
$rh_Scale_Factor->{$language} = $1;
|
|
|
3158 |
|
|
|
3159 |
} elsif (/^ end_of_line_continuation\s+(\S+)\s*$/) {
|
|
|
3160 |
next if $already_know_it;
|
|
|
3161 |
$rh_EOL_Continuation_re->{$language} = $1;
|
|
|
3162 |
|
|
|
3163 |
} else {
|
|
|
3164 |
die "Unexpected data line $. of $file:\n$_\n";
|
|
|
3165 |
}
|
|
|
3166 |
|
|
|
3167 |
}
|
|
|
3168 |
$IN->close;
|
|
|
3169 |
print "<- merge_lang_def\n" if $opt_v > 2;
|
|
|
3170 |
} # 1}}}
|
|
|
3171 |
sub print_extension_info { # {{{1
|
|
|
3172 |
my ($extension,) = @_;
|
|
|
3173 |
if ($extension) { # show information on this extension
|
|
|
3174 |
foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
|
|
|
3175 |
# Language_by_Extension{f} = 'Fortran 77'
|
|
|
3176 |
printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext}
|
|
|
3177 |
if $ext =~ m{$extension}i;
|
|
|
3178 |
}
|
|
|
3179 |
} else { # show information on all extensions
|
|
|
3180 |
foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
|
|
|
3181 |
# Language_by_Extension{f} = 'Fortran 77'
|
|
|
3182 |
printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext};
|
|
|
3183 |
}
|
|
|
3184 |
}
|
|
|
3185 |
} # 1}}}
|
|
|
3186 |
sub print_language_info { # {{{1
|
|
|
3187 |
my ($language,
|
|
|
3188 |
$prefix ,) = @_;
|
|
|
3189 |
my %extensions = (); # the subset matched by the given $language value
|
|
|
3190 |
if ($language) { # show information on this language
|
|
|
3191 |
foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
|
|
|
3192 |
# Language_by_Extension{f} = 'Fortran 77'
|
|
|
3193 |
push @{$extensions{$Language_by_Extension{$ext}} }, $ext
|
|
|
3194 |
if lc $Language_by_Extension{$ext} eq lc $language;
|
|
|
3195 |
# if $Language_by_Extension{$ext} =~ m{$language}i;
|
|
|
3196 |
}
|
|
|
3197 |
} else { # show information on all languages
|
|
|
3198 |
foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
|
|
|
3199 |
# Language_by_Extension{f} = 'Fortran 77'
|
|
|
3200 |
push @{$extensions{$Language_by_Extension{$ext}} }, $ext
|
|
|
3201 |
}
|
|
|
3202 |
}
|
|
|
3203 |
|
|
|
3204 |
# add exceptions (one file extension mapping to multiple languages)
|
|
|
3205 |
if (!$language or
|
|
|
3206 |
$language =~ /^(Objective C|MATLAB|MUMPS|Mercury)$/i) {
|
|
|
3207 |
push @{$extensions{'Objective C'}}, "m";
|
|
|
3208 |
push @{$extensions{'MATLAB'}} , "m";
|
|
|
3209 |
push @{$extensions{'MUMPS'}} , "m";
|
|
|
3210 |
delete $extensions{'MATLAB/Objective C/MUMPS/Mercury'};
|
|
|
3211 |
} elsif ($language =~ /^(Lisp|OpenCL)$/i) {
|
|
|
3212 |
push @{$extensions{'Lisp'}} , "cl";
|
|
|
3213 |
push @{$extensions{'OpenCL'}}, "cl";
|
|
|
3214 |
delete $extensions{'Lisp/OpenCL'};
|
|
|
3215 |
} elsif ($language =~ /^(Lisp|Julia)$/i) {
|
|
|
3216 |
push @{$extensions{'Lisp'}} , "jl";
|
|
|
3217 |
push @{$extensions{'Julia'}} , "jl";
|
|
|
3218 |
delete $extensions{'Lisp/Julia'};
|
|
|
3219 |
} elsif ($language =~ /^(Perl|Prolog)$/i) {
|
|
|
3220 |
push @{$extensions{'Perl'}} , "pl";
|
|
|
3221 |
push @{$extensions{'Prolog'}}, "pl";
|
|
|
3222 |
delete $extensions{'Perl/Prolog'};
|
|
|
3223 |
} elsif ($language =~ /^(IDL|Qt Project|Prolog)$/i) {
|
|
|
3224 |
push @{$extensions{'IDL'}} , "pro";
|
|
|
3225 |
push @{$extensions{'Qt Project'}}, "pro";
|
|
|
3226 |
push @{$extensions{'Prolog'}} , "pro";
|
|
|
3227 |
delete $extensions{'IDL/Qt Project/Prolog'};
|
|
|
3228 |
} elsif ($language =~ /^(D|dtrace)$/i) {
|
|
|
3229 |
push @{$extensions{'D'}} , "d";
|
|
|
3230 |
push @{$extensions{'dtrace'}} , "d";
|
|
|
3231 |
delete $extensions{'D/dtrace'};
|
|
|
3232 |
} elsif ($language =~ /^(Ant)$/i) {
|
|
|
3233 |
push @{$extensions{'Ant'}} , "build.xml";
|
|
|
3234 |
delete $extensions{'Ant/XML'};
|
|
|
3235 |
}
|
|
|
3236 |
|
|
|
3237 |
if (%extensions) {
|
|
|
3238 |
foreach my $lang (sort {lc $a cmp lc $b } keys %extensions) {
|
|
|
3239 |
if ($prefix) {
|
|
|
3240 |
printf "%s %s\n", $prefix, join(", ", @{$extensions{$lang}});
|
|
|
3241 |
} else {
|
|
|
3242 |
printf "%-26s (%s)\n", $lang, join(", ", @{$extensions{$lang}});
|
|
|
3243 |
}
|
|
|
3244 |
}
|
|
|
3245 |
}
|
|
|
3246 |
} # 1}}}
|
|
|
3247 |
sub print_language_filters { # {{{1
|
|
|
3248 |
my ($language,) = @_;
|
|
|
3249 |
if (!@{$Filters_by_Language{$language}}) {
|
|
|
3250 |
warn "Unknown language: $language\n";
|
|
|
3251 |
warn "Use --show-lang to list all defined languages.\n";
|
|
|
3252 |
return;
|
|
|
3253 |
}
|
|
|
3254 |
printf "%s\n", $language;
|
|
|
3255 |
foreach my $filter (@{$Filters_by_Language{$language}}) {
|
|
|
3256 |
printf " filter %s", $filter->[0];
|
|
|
3257 |
printf " %s", $filter->[1] if defined $filter->[1];
|
|
|
3258 |
printf " %s", $filter->[2] if defined $filter->[2];
|
|
|
3259 |
print "\n";
|
|
|
3260 |
}
|
|
|
3261 |
print_language_info($language, " extensions:");
|
|
|
3262 |
} # 1}}}
|
|
|
3263 |
sub make_file_list { # {{{1
|
|
|
3264 |
my ($ra_arg_list, # in file and/or directory names to examine
|
|
|
3265 |
$rh_Err , # in hash of error codes
|
|
|
3266 |
$raa_errors , # out errors encountered
|
|
|
3267 |
$rh_ignored , # out files not recognized as computer languages
|
|
|
3268 |
) = @_;
|
|
|
3269 |
print "-> make_file_list(@{$ra_arg_list})\n" if $opt_v > 2;
|
|
|
3270 |
|
|
|
3271 |
my ($fh, $filename);
|
|
|
3272 |
if ($opt_categorized) {
|
|
|
3273 |
$filename = $opt_categorized;
|
|
|
3274 |
$fh = new IO::File $filename, "+>"; # open for read/write
|
|
|
3275 |
die "Unable to write to $filename: $!\n" unless defined $fh;
|
|
|
3276 |
} elsif ($opt_sdir) {
|
|
|
3277 |
# write to the user-defined scratch directory
|
|
|
3278 |
$filename = $opt_sdir . '/cloc_file_list.txt';
|
|
|
3279 |
$fh = new IO::File $filename, "+>"; # open for read/write
|
|
|
3280 |
die "Unable to write to $filename: $!\n" unless defined $fh;
|
|
|
3281 |
} else {
|
|
|
3282 |
# let File::Temp create a suitable temporary file
|
|
|
3283 |
($fh, $filename) = tempfile(UNLINK => 1); # delete file on exit
|
|
|
3284 |
print "Using temp file list [$filename]\n" if $opt_v;
|
|
|
3285 |
}
|
|
|
3286 |
|
|
|
3287 |
my @dir_list = ();
|
|
|
3288 |
foreach my $file_or_dir (@{$ra_arg_list}) {
|
|
|
3289 |
#print "make_file_list file_or_dir=$file_or_dir\n";
|
|
|
3290 |
my $size_in_bytes = 0;
|
|
|
3291 |
if (!-r $file_or_dir) {
|
|
|
3292 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file_or_dir];
|
|
|
3293 |
next;
|
|
|
3294 |
}
|
|
|
3295 |
if (is_file($file_or_dir)) {
|
|
|
3296 |
if (!(-s $file_or_dir)) { # 0 sized file, named pipe, socket
|
|
|
3297 |
$rh_ignored->{$file_or_dir} = 'zero sized file';
|
|
|
3298 |
next;
|
|
|
3299 |
} elsif (-B $file_or_dir and !$opt_read_binary_files) {
|
|
|
3300 |
# avoid binary files unless user insists on reading them
|
|
|
3301 |
if ($opt_unicode) {
|
|
|
3302 |
# only ignore if not a Unicode file w/trivial
|
|
|
3303 |
# ASCII transliteration
|
|
|
3304 |
if (!unicode_file($file_or_dir)) {
|
|
|
3305 |
$rh_ignored->{$file_or_dir} = 'binary file';
|
|
|
3306 |
next;
|
|
|
3307 |
}
|
|
|
3308 |
} else {
|
|
|
3309 |
$rh_ignored->{$file_or_dir} = 'binary file';
|
|
|
3310 |
next;
|
|
|
3311 |
}
|
|
|
3312 |
}
|
|
|
3313 |
push @file_list, "$file_or_dir";
|
|
|
3314 |
} elsif (is_dir($file_or_dir)) {
|
|
|
3315 |
push @dir_list, $file_or_dir;
|
|
|
3316 |
} else {
|
|
|
3317 |
push @{$raa_errors}, [$rh_Err->{'Neither file nor directory'} , $file_or_dir];
|
|
|
3318 |
$rh_ignored->{$file_or_dir} = 'not file, not directory';
|
|
|
3319 |
}
|
|
|
3320 |
}
|
|
|
3321 |
foreach my $dir (@dir_list) {
|
|
|
3322 |
#print "make_file_list dir=$dir\n";
|
|
|
3323 |
# populates global variable @file_list
|
|
|
3324 |
find({wanted => \&files ,
|
|
|
3325 |
preprocess => \&find_preprocessor,
|
|
|
3326 |
follow => $opt_follow_links }, $dir);
|
|
|
3327 |
}
|
|
|
3328 |
$nFiles_Found = scalar @file_list;
|
|
|
3329 |
printf "%8d text file%s.\n", plural_form($nFiles_Found) unless $opt_quiet;
|
|
|
3330 |
write_file($opt_found, sort @file_list) if $opt_found;
|
|
|
3331 |
|
|
|
3332 |
my $nFiles_Categorized = 0;
|
|
|
3333 |
foreach my $file (@file_list) {
|
|
|
3334 |
printf "classifying $file\n" if $opt_v > 2;
|
|
|
3335 |
|
|
|
3336 |
my $basename = basename $file;
|
|
|
3337 |
if ($Not_Code_Filename{$basename}) {
|
|
|
3338 |
$rh_ignored->{$file} = "listed in " . '$' .
|
|
|
3339 |
"Not_Code_Filename{$basename}";
|
|
|
3340 |
next;
|
|
|
3341 |
} elsif ($basename =~ m{~$}) {
|
|
|
3342 |
$rh_ignored->{$file} = "temporary editor file";
|
|
|
3343 |
next;
|
|
|
3344 |
}
|
|
|
3345 |
|
|
|
3346 |
my $size_in_bytes = (stat $file)[7];
|
| 5792 |
dpurdie |
3347 |
|
|
|
3348 |
# Vix extension
|
|
|
3349 |
# Perl under Windows can't handle pats longer that 255, so don't try
|
|
|
3350 |
# stat will return undef
|
|
|
3351 |
#
|
|
|
3352 |
if (! defined $size_in_bytes)
|
|
|
3353 |
{
|
|
|
3354 |
$rh_ignored->{$file} = "Windows path too long";
|
|
|
3355 |
next;
|
|
|
3356 |
}
|
|
|
3357 |
|
| 5282 |
dpurdie |
3358 |
my $language = "";
|
|
|
3359 |
if ($All_One_Language) {
|
|
|
3360 |
# user over-rode auto-language detection by using
|
|
|
3361 |
# --force-lang with just a language name (no extension)
|
|
|
3362 |
$language = $All_One_Language;
|
|
|
3363 |
} else {
|
|
|
3364 |
$language = classify_file($file ,
|
|
|
3365 |
$rh_Err ,
|
|
|
3366 |
$raa_errors,
|
|
|
3367 |
$rh_ignored);
|
|
|
3368 |
}
|
| 5792 |
dpurdie |
3369 |
|
| 5282 |
dpurdie |
3370 |
die "make_file_list($file) undef size" unless defined $size_in_bytes;
|
|
|
3371 |
die "make_file_list($file) undef lang" unless defined $language;
|
|
|
3372 |
printf $fh "%d,%s,%s\n", $size_in_bytes, $language, $file;
|
|
|
3373 |
++$nFiles_Categorized;
|
|
|
3374 |
#printf "classified %d files\n", $nFiles_Categorized
|
|
|
3375 |
# unless (!$opt_progress_rate or
|
|
|
3376 |
# ($nFiles_Categorized % $opt_progress_rate));
|
|
|
3377 |
}
|
|
|
3378 |
printf "classified %d files\r", $nFiles_Categorized
|
|
|
3379 |
if !$opt_quiet and $nFiles_Categorized > 1;
|
|
|
3380 |
|
|
|
3381 |
print "<- make_file_list()\n" if $opt_v > 2;
|
|
|
3382 |
|
|
|
3383 |
return $fh; # handle to the file containing the list of files to process
|
|
|
3384 |
} # 1}}}
|
|
|
3385 |
sub remove_duplicate_files { # {{{1
|
|
|
3386 |
my ($fh , # in
|
|
|
3387 |
$rh_Language , # out
|
|
|
3388 |
$rh_unique_source_file, # out
|
|
|
3389 |
$rh_Err , # in
|
|
|
3390 |
$raa_errors , # out errors encountered
|
|
|
3391 |
$rh_ignored , # out
|
|
|
3392 |
) = @_;
|
|
|
3393 |
|
|
|
3394 |
# Check for duplicate files by comparing file sizes.
|
|
|
3395 |
# Where files are equally sized, compare their MD5 checksums.
|
|
|
3396 |
print "-> remove_duplicate_files\n" if $opt_v > 2;
|
|
|
3397 |
|
|
|
3398 |
my $n = 0;
|
|
|
3399 |
my %files_by_size = (); # files_by_size{ # bytes } = [ list of files ]
|
|
|
3400 |
seek($fh, 0, 0); # rewind to beginning of the temp file
|
|
|
3401 |
while (<$fh>) {
|
|
|
3402 |
++$n;
|
|
|
3403 |
my ($size_in_bytes, $language, $file) = split(/,/, $_, 3);
|
|
|
3404 |
chomp($file);
|
|
|
3405 |
$rh_Language->{$file} = $language;
|
|
|
3406 |
push @{$files_by_size{$size_in_bytes}}, $file;
|
|
|
3407 |
if ($opt_skip_uniqueness) {
|
|
|
3408 |
$rh_unique_source_file->{$file} = 1;
|
|
|
3409 |
}
|
|
|
3410 |
}
|
|
|
3411 |
return if $opt_skip_uniqueness;
|
|
|
3412 |
if ($opt_progress_rate and ($n > $opt_progress_rate)) {
|
|
|
3413 |
printf "Duplicate file check %d files (%d known unique)\r",
|
|
|
3414 |
$n, scalar keys %files_by_size;
|
|
|
3415 |
}
|
|
|
3416 |
$n = 0;
|
|
|
3417 |
foreach my $bytes (sort {$a <=> $b} keys %files_by_size) {
|
|
|
3418 |
++$n;
|
|
|
3419 |
printf "Unique: %8d files \r",
|
|
|
3420 |
$n unless (!$opt_progress_rate or ($n % $opt_progress_rate));
|
|
|
3421 |
if (scalar @{$files_by_size{$bytes}} == 1) {
|
|
|
3422 |
# only one file is this big; must be unique
|
|
|
3423 |
$rh_unique_source_file->{$files_by_size{$bytes}[0]} = 1;
|
|
|
3424 |
next;
|
|
|
3425 |
} else {
|
|
|
3426 |
#print "equally sized files: ",join(", ", @{$files_by_size{$bytes}}), "\n";
|
|
|
3427 |
# Files in the list @{$files_by_size{$bytes} all are
|
|
|
3428 |
# $bytes long. Sort the list by file basename.
|
|
|
3429 |
|
|
|
3430 |
# # sorting on basename causes repeatability problems
|
|
|
3431 |
# # if the basename is not unique (eg "includeA/x.h"
|
|
|
3432 |
# # and "includeB/x.h". Instead, sort on full path.
|
|
|
3433 |
# # Ref bug #114.
|
|
|
3434 |
# my @sorted_bn = ();
|
|
|
3435 |
# my %BN = map { basename($_) => $_ } @{$files_by_size{$bytes}};
|
|
|
3436 |
# foreach my $F (sort keys %BN) {
|
|
|
3437 |
# push @sorted_bn, $BN{$F};
|
|
|
3438 |
# }
|
|
|
3439 |
|
|
|
3440 |
my @sorted_bn = sort @{$files_by_size{$bytes}};
|
|
|
3441 |
|
|
|
3442 |
foreach my $F (different_files(\@sorted_bn ,
|
|
|
3443 |
$rh_Err ,
|
|
|
3444 |
$raa_errors ,
|
|
|
3445 |
$rh_ignored ) ) {
|
|
|
3446 |
$rh_unique_source_file->{$F} = 1;
|
|
|
3447 |
}
|
|
|
3448 |
}
|
|
|
3449 |
}
|
|
|
3450 |
print "<- remove_duplicate_files\n" if $opt_v > 2;
|
|
|
3451 |
} # 1}}}
|
|
|
3452 |
sub find_preprocessor { # {{{1
|
|
|
3453 |
# invoked by File::Find's find()
|
|
|
3454 |
# Reads global variable %Exclude_Dir.
|
|
|
3455 |
# Populates global variable %Ignored.
|
|
|
3456 |
# Reject files/directories in cwd which are in the exclude list.
|
|
|
3457 |
|
|
|
3458 |
my @ok = ();
|
|
|
3459 |
foreach my $F_or_D (@_) { # pure file or directory name, no separators
|
|
|
3460 |
if ($Exclude_Dir{$F_or_D}) {
|
|
|
3461 |
$Ignored{$File::Find::name} = "--exclude-dir=$Exclude_Dir{$F_or_D}";
|
|
|
3462 |
} elsif (-d $F_or_D) {
|
|
|
3463 |
if ($opt_not_match_d and $F_or_D =~ m{$opt_not_match_d}) {
|
|
|
3464 |
$Ignored{$File::Find::name} = "--not-match-d=$opt_not_match_d";
|
|
|
3465 |
} else {
|
|
|
3466 |
push @ok, $F_or_D;
|
|
|
3467 |
}
|
|
|
3468 |
|
|
|
3469 |
} else {
|
|
|
3470 |
push @ok, $F_or_D;
|
|
|
3471 |
}
|
|
|
3472 |
}
|
|
|
3473 |
return @ok;
|
|
|
3474 |
} # 1}}}
|
|
|
3475 |
sub files { # {{{1
|
|
|
3476 |
# invoked by File::Find's find() Populates global variable @file_list.
|
|
|
3477 |
# See also find_preprocessor() which prunes undesired directories.
|
|
|
3478 |
|
|
|
3479 |
my $Dir = cwd(); # not $File::Find::dir which just gives relative path
|
|
|
3480 |
if ($opt_match_f ) { return unless /$opt_match_f/; }
|
|
|
3481 |
if ($opt_not_match_f) { return if /$opt_not_match_f/; }
|
|
|
3482 |
if ($opt_match_d ) { return unless $Dir =~ m{$opt_match_d} }
|
|
|
3483 |
|
|
|
3484 |
my $nBytes = -s $_ ;
|
|
|
3485 |
if (!$nBytes) {
|
|
|
3486 |
$Ignored{$File::Find::name} = 'zero sized file';
|
|
|
3487 |
printf "files(%s) zero size\n", $File::Find::name if $opt_v > 5;
|
|
|
3488 |
}
|
|
|
3489 |
return unless $nBytes ; # attempting other tests w/pipe or socket will hang
|
|
|
3490 |
if ($nBytes > $opt_max_file_size*1024**2) {
|
|
|
3491 |
$Ignored{$File::Find::name} = "file size of " .
|
|
|
3492 |
$nBytes/1024**2 . " MB exceeds max file size of " .
|
|
|
3493 |
"$opt_max_file_size MB";
|
|
|
3494 |
printf "file(%s) exceeds $opt_max_file_size MB\n",
|
|
|
3495 |
$File::Find::name if $opt_v > 5;
|
|
|
3496 |
return;
|
|
|
3497 |
}
|
|
|
3498 |
my $is_dir = is_dir($_);
|
|
|
3499 |
my $is_bin = -B $_ ;
|
|
|
3500 |
printf "files(%s) size=%d is_dir=%d -B=%d\n",
|
|
|
3501 |
$File::Find::name, $nBytes, $is_dir, $is_bin if $opt_v > 5;
|
|
|
3502 |
$is_bin = 0 if $opt_unicode and unicode_file($_);
|
|
|
3503 |
$is_bin = 0 if $opt_read_binary_files;
|
|
|
3504 |
return if $is_dir or $is_bin;
|
|
|
3505 |
++$nFiles_Found;
|
|
|
3506 |
printf "%8d files\r", $nFiles_Found
|
|
|
3507 |
unless (!$opt_progress_rate or ($nFiles_Found % $opt_progress_rate));
|
|
|
3508 |
push @file_list, $File::Find::name;
|
|
|
3509 |
} # 1}}}
|
|
|
3510 |
sub archive_files { # {{{1
|
|
|
3511 |
# invoked by File::Find's find() Populates global variable @binary_archive
|
|
|
3512 |
foreach my $ext (keys %Known_Binary_Archives) {
|
|
|
3513 |
push @binary_archive, $File::Find::name
|
|
|
3514 |
if $File::Find::name =~ m{$ext$};
|
|
|
3515 |
}
|
|
|
3516 |
} # 1}}}
|
|
|
3517 |
sub is_file { # {{{1
|
|
|
3518 |
# portable method to test if item is a file
|
|
|
3519 |
# (-f doesn't work in ActiveState Perl on Windows)
|
|
|
3520 |
my $item = shift @_;
|
|
|
3521 |
|
|
|
3522 |
if ($ON_WINDOWS) {
|
|
|
3523 |
my $mode = (stat $item)[2];
|
|
|
3524 |
$mode = 0 unless $mode;
|
|
|
3525 |
if ($mode & 0100000) { return 1; }
|
|
|
3526 |
else { return 0; }
|
|
|
3527 |
} else {
|
|
|
3528 |
return (-f $item); # works on Unix, Linux, CygWin, z/OS
|
|
|
3529 |
}
|
|
|
3530 |
} # 1}}}
|
|
|
3531 |
sub is_dir { # {{{1
|
|
|
3532 |
# portable method to test if item is a directory
|
|
|
3533 |
# (-d doesn't work in ActiveState Perl on Windows)
|
|
|
3534 |
my $item = shift @_;
|
|
|
3535 |
|
|
|
3536 |
if ($ON_WINDOWS) {
|
|
|
3537 |
my $mode = (stat $item)[2];
|
|
|
3538 |
$mode = 0 unless $mode;
|
|
|
3539 |
if ($mode & 0040000) { return 1; }
|
|
|
3540 |
else { return 0; }
|
|
|
3541 |
} else {
|
|
|
3542 |
return (-d $item); # works on Unix, Linux, CygWin, z/OS
|
|
|
3543 |
}
|
|
|
3544 |
} # 1}}}
|
|
|
3545 |
sub is_excluded { # {{{1
|
|
|
3546 |
my ($file , # in
|
|
|
3547 |
$excluded , # in hash of excluded directories
|
|
|
3548 |
) = @_;
|
|
|
3549 |
my($filename, $filepath, $suffix) = fileparse($file);
|
|
|
3550 |
foreach my $path (sort keys %{$excluded}) {
|
|
|
3551 |
return 1 if ($filepath =~ m{^$path/}i);
|
|
|
3552 |
}
|
|
|
3553 |
} # 1}}}
|
|
|
3554 |
sub classify_file { # {{{1
|
|
|
3555 |
my ($full_file , # in
|
|
|
3556 |
$rh_Err , # in hash of error codes
|
|
|
3557 |
$raa_errors , # out
|
|
|
3558 |
$rh_ignored , # out
|
|
|
3559 |
) = @_;
|
|
|
3560 |
|
|
|
3561 |
print "-> classify_file($full_file)\n" if $opt_v > 2;
|
|
|
3562 |
my $language = "(unknown)";
|
|
|
3563 |
|
|
|
3564 |
if (basename($full_file) eq "-" && defined $opt_stdin_name) {
|
|
|
3565 |
$full_file = $opt_stdin_name;
|
|
|
3566 |
}
|
|
|
3567 |
|
|
|
3568 |
my $look_at_first_line = 0;
|
|
|
3569 |
my $file = basename $full_file;
|
|
|
3570 |
if ($opt_autoconf and $file =~ /\.in$/) {
|
|
|
3571 |
$file =~ s/\.in$//;
|
|
|
3572 |
}
|
|
|
3573 |
return $language if $Not_Code_Filename{$file}; # (unknown)
|
|
|
3574 |
return $language if $file =~ m{~$}; # a temp edit file (unknown)
|
|
|
3575 |
if (defined $Language_by_File{$file}) {
|
|
|
3576 |
if ($Language_by_File{$file} eq "Ant/XML") {
|
|
|
3577 |
return Ant_or_XML( $full_file, $rh_Err, $raa_errors);
|
|
|
3578 |
} elsif ($Language_by_File{$file} eq "Maven/XML") {
|
|
|
3579 |
return Maven_or_XML($full_file, $rh_Err, $raa_errors);
|
|
|
3580 |
} else {
|
|
|
3581 |
return $Language_by_File{$file};
|
|
|
3582 |
}
|
|
|
3583 |
}
|
|
|
3584 |
|
|
|
3585 |
if ($file =~ /\.([^\.]+)$/) { # has an extension
|
|
|
3586 |
print "$full_file extension=[$1]\n" if $opt_v > 2;
|
|
|
3587 |
my $extension = $1;
|
|
|
3588 |
# Windows file names are case insensitive so map
|
|
|
3589 |
# all extensions to lowercase there.
|
|
|
3590 |
$extension = lc $extension if $ON_WINDOWS;
|
|
|
3591 |
my @extension_list = ( $extension );
|
|
|
3592 |
if ($file =~ /\.([^\.]+\.[^\.]+)$/) { # has a double extension
|
|
|
3593 |
my $extension = $1;
|
|
|
3594 |
$extension = lc $extension if $ON_WINDOWS;
|
|
|
3595 |
unshift @extension_list, $extension; # examine double ext first
|
|
|
3596 |
}
|
|
|
3597 |
foreach my $extension (@extension_list) {
|
|
|
3598 |
if ($Not_Code_Extension{$extension} and
|
|
|
3599 |
!$Forced_Extension{$extension}) {
|
|
|
3600 |
# If .1 (for example) is an extension that would ordinarily be
|
|
|
3601 |
# ignored but the user has insisted this be counted with the
|
|
|
3602 |
# --force-lang option, then go ahead and count it.
|
|
|
3603 |
$rh_ignored->{$full_file} =
|
|
|
3604 |
'listed in $Not_Code_Extension{' . $extension . '}';
|
|
|
3605 |
return $language;
|
|
|
3606 |
}
|
|
|
3607 |
if (defined $Language_by_Extension{$extension}) {
|
|
|
3608 |
if ($Language_by_Extension{$extension} eq
|
|
|
3609 |
'MATLAB/Objective C/MUMPS/Mercury') {
|
|
|
3610 |
my $lang_M_or_O = "";
|
|
|
3611 |
matlab_or_objective_C($full_file ,
|
|
|
3612 |
$rh_Err ,
|
|
|
3613 |
$raa_errors,
|
|
|
3614 |
\$lang_M_or_O);
|
|
|
3615 |
if ($lang_M_or_O) {
|
|
|
3616 |
return $lang_M_or_O;
|
|
|
3617 |
} else { # an error happened in matlab_or_objective_C()
|
|
|
3618 |
$rh_ignored->{$full_file} =
|
|
|
3619 |
'failure in matlab_or_objective_C()';
|
|
|
3620 |
return $language; # (unknown)
|
|
|
3621 |
}
|
|
|
3622 |
} elsif ($Language_by_Extension{$extension} eq 'PHP/Pascal') {
|
|
|
3623 |
if (really_is_php($full_file)) {
|
|
|
3624 |
return 'PHP';
|
|
|
3625 |
} elsif (really_is_incpascal($full_file)) {
|
|
|
3626 |
return 'Pascal';
|
|
|
3627 |
} else {
|
|
|
3628 |
return $language; # (unknown)
|
|
|
3629 |
}
|
|
|
3630 |
} elsif ($Language_by_Extension{$extension} eq 'Pascal/Puppet') {
|
|
|
3631 |
my $lang_Pasc_or_Pup = "";
|
|
|
3632 |
pascal_or_puppet( $full_file ,
|
|
|
3633 |
$rh_Err ,
|
|
|
3634 |
$raa_errors,
|
|
|
3635 |
\$lang_Pasc_or_Pup);
|
|
|
3636 |
if ($lang_Pasc_or_Pup) {
|
|
|
3637 |
return $lang_Pasc_or_Pup;
|
|
|
3638 |
} else { # an error happened in pascal_or_puppet()
|
|
|
3639 |
$rh_ignored->{$full_file} =
|
|
|
3640 |
'failure in pascal_or_puppet()';
|
|
|
3641 |
return $language; # (unknown)
|
|
|
3642 |
}
|
|
|
3643 |
} elsif ($Language_by_Extension{$extension} eq 'Lisp/OpenCL') {
|
|
|
3644 |
return Lisp_or_OpenCL($full_file, $rh_Err, $raa_errors);
|
|
|
3645 |
} elsif ($Language_by_Extension{$extension} eq 'Lisp/Julia') {
|
|
|
3646 |
return Lisp_or_Julia( $full_file, $rh_Err, $raa_errors);
|
|
|
3647 |
} elsif ($Language_by_Extension{$extension} eq 'Perl/Prolog') {
|
|
|
3648 |
return Perl_or_Prolog($full_file, $rh_Err, $raa_errors);
|
|
|
3649 |
} elsif ($Language_by_Extension{$extension} eq
|
|
|
3650 |
'IDL/Qt Project/Prolog') {
|
|
|
3651 |
return IDL_or_QtProject($full_file, $rh_Err, $raa_errors);
|
|
|
3652 |
} elsif ($Language_by_Extension{$extension} eq 'D/dtrace') {
|
|
|
3653 |
# is it D or an init.d shell script?
|
|
|
3654 |
my $a_script = really_is_D($full_file, $rh_Err, $raa_errors);
|
|
|
3655 |
if ($a_script) {
|
|
|
3656 |
# could be dtrace, sh, bash or anything one would
|
|
|
3657 |
# write an init.d script in
|
|
|
3658 |
if (defined $Language_by_Script{$a_script}) {
|
|
|
3659 |
return $Language_by_Script{$a_script};
|
|
|
3660 |
} else {
|
|
|
3661 |
$rh_ignored->{$full_file} =
|
|
|
3662 |
"Unrecognized script language, '$a_script'";
|
|
|
3663 |
}
|
|
|
3664 |
} else {
|
|
|
3665 |
return 'D';
|
|
|
3666 |
}
|
|
|
3667 |
} elsif ($Language_by_Extension{$extension} eq 'Smarty') {
|
|
|
3668 |
# Smarty extension .tpl is generic; make sure the
|
|
|
3669 |
# file at least roughly resembles PHP. Alternatively,
|
|
|
3670 |
# if the user forces the issue, do the count.
|
|
|
3671 |
my $force_smarty = 0;
|
|
|
3672 |
foreach (@opt_force_lang) {
|
|
|
3673 |
if (lc($_) eq "smarty,tpl") {
|
|
|
3674 |
$force_smarty = 1;
|
|
|
3675 |
last;
|
|
|
3676 |
}
|
|
|
3677 |
}
|
|
|
3678 |
if (really_is_php($full_file) or $force_smarty) {
|
|
|
3679 |
return 'Smarty';
|
|
|
3680 |
} else {
|
|
|
3681 |
return $language; # (unknown)
|
|
|
3682 |
}
|
|
|
3683 |
} else {
|
|
|
3684 |
return $Language_by_Extension{$extension};
|
|
|
3685 |
}
|
|
|
3686 |
} else { # has an unmapped file extension
|
|
|
3687 |
$look_at_first_line = 1;
|
|
|
3688 |
}
|
|
|
3689 |
}
|
|
|
3690 |
} elsif (defined $Language_by_File{lc $file}) {
|
|
|
3691 |
return $Language_by_File{lc $file};
|
|
|
3692 |
} elsif ($opt_lang_no_ext and
|
|
|
3693 |
defined $Filters_by_Language{$opt_lang_no_ext}) {
|
|
|
3694 |
return $opt_lang_no_ext;
|
|
|
3695 |
} else { # no file extension
|
|
|
3696 |
$look_at_first_line = 1;
|
|
|
3697 |
}
|
|
|
3698 |
|
|
|
3699 |
if ($look_at_first_line) {
|
|
|
3700 |
# maybe it is a shell/Perl/Python/Ruby/etc script that
|
|
|
3701 |
# starts with pound bang:
|
|
|
3702 |
# #!/usr/bin/perl
|
|
|
3703 |
# #!/usr/bin/env perl
|
|
|
3704 |
my $script_language = peek_at_first_line($full_file ,
|
|
|
3705 |
$rh_Err ,
|
|
|
3706 |
$raa_errors);
|
|
|
3707 |
if (!$script_language) {
|
|
|
3708 |
$rh_ignored->{$full_file} = "language unknown (#2)";
|
|
|
3709 |
# returns (unknown)
|
|
|
3710 |
}
|
|
|
3711 |
if (defined $Language_by_Script{$script_language}) {
|
|
|
3712 |
if (defined $Filters_by_Language{
|
|
|
3713 |
$Language_by_Script{$script_language}}) {
|
|
|
3714 |
$language = $Language_by_Script{$script_language};
|
|
|
3715 |
} else {
|
|
|
3716 |
$rh_ignored->{$full_file} =
|
|
|
3717 |
"undefined: Filters_by_Language{" .
|
|
|
3718 |
$Language_by_Script{$script_language} .
|
|
|
3719 |
"} for scripting language $script_language";
|
|
|
3720 |
# returns (unknown)
|
|
|
3721 |
}
|
|
|
3722 |
} else {
|
|
|
3723 |
$rh_ignored->{$full_file} = "language unknown (#3)";
|
|
|
3724 |
# returns (unknown)
|
|
|
3725 |
}
|
|
|
3726 |
}
|
|
|
3727 |
print "<- classify_file($full_file)\n" if $opt_v > 2;
|
|
|
3728 |
return $language;
|
|
|
3729 |
} # 1}}}
|
|
|
3730 |
sub peek_at_first_line { # {{{1
|
|
|
3731 |
my ($file , # in
|
|
|
3732 |
$rh_Err , # in hash of error codes
|
|
|
3733 |
$raa_errors , # out
|
|
|
3734 |
) = @_;
|
|
|
3735 |
|
|
|
3736 |
print "-> peek_at_first_line($file)\n" if $opt_v > 2;
|
|
|
3737 |
|
|
|
3738 |
my $script_language = "";
|
|
|
3739 |
if (!-r $file) {
|
|
|
3740 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
3741 |
return $script_language;
|
|
|
3742 |
}
|
|
|
3743 |
my $IN = new IO::File $file, "r";
|
|
|
3744 |
if (!defined $IN) {
|
|
|
3745 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
3746 |
print "<- peek_at_first_line($file)\n" if $opt_v > 2;
|
|
|
3747 |
return $script_language;
|
|
|
3748 |
}
|
|
|
3749 |
chomp(my $first_line = <$IN>);
|
|
|
3750 |
if (defined $first_line) {
|
|
|
3751 |
#print "peek_at_first_line of [$file] first_line=[$first_line]\n";
|
|
|
3752 |
if ($first_line =~ /^#\!\s*(\S.*?)$/) {
|
|
|
3753 |
#print "peek_at_first_line 1=[$1]\n";
|
|
|
3754 |
my @pound_bang = split(' ', $1);
|
|
|
3755 |
#print "peek_at_first_line basename 0=[", basename($pound_bang[0]), "]\n";
|
|
|
3756 |
if (basename($pound_bang[0]) eq "env" and
|
|
|
3757 |
scalar @pound_bang > 1) {
|
|
|
3758 |
$script_language = $pound_bang[1];
|
|
|
3759 |
#print "peek_at_first_line pound_bang A $pound_bang[1]\n";
|
|
|
3760 |
} else {
|
|
|
3761 |
$script_language = basename $pound_bang[0];
|
|
|
3762 |
#print "peek_at_first_line pound_bang B $script_language\n";
|
|
|
3763 |
}
|
|
|
3764 |
}
|
|
|
3765 |
}
|
|
|
3766 |
$IN->close;
|
|
|
3767 |
print "<- peek_at_first_line($file)\n" if $opt_v > 2;
|
|
|
3768 |
return $script_language;
|
|
|
3769 |
} # 1}}}
|
|
|
3770 |
sub different_files { # {{{1
|
|
|
3771 |
# See which of the given files are unique by computing each file's MD5
|
|
|
3772 |
# sum. Return the subset of files which are unique.
|
|
|
3773 |
my ($ra_files , # in
|
|
|
3774 |
$rh_Err , # in
|
|
|
3775 |
$raa_errors , # out
|
|
|
3776 |
$rh_ignored , # out
|
|
|
3777 |
) = @_;
|
|
|
3778 |
|
|
|
3779 |
print "-> different_files(@{$ra_files})\n" if $opt_v > 2;
|
|
|
3780 |
my %file_hash = (); # file_hash{md5 hash} = [ file1, file2, ... ]
|
|
|
3781 |
foreach my $F (@{$ra_files}) {
|
|
|
3782 |
next if is_dir($F); # needed for Windows
|
|
|
3783 |
my $IN = new IO::File $F, "r";
|
|
|
3784 |
if (!defined $IN) {
|
|
|
3785 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
|
|
|
3786 |
$rh_ignored->{$F} = 'cannot read';
|
|
|
3787 |
} else {
|
|
|
3788 |
if ($HAVE_Digest_MD5) {
|
|
|
3789 |
binmode $IN;
|
|
|
3790 |
my $MD5 = Digest::MD5->new->addfile($IN)->hexdigest;
|
|
|
3791 |
#print "$F, $MD5\n";
|
|
|
3792 |
push @{$file_hash{$MD5}}, $F;
|
|
|
3793 |
} else {
|
|
|
3794 |
# all files treated unique
|
|
|
3795 |
push @{$file_hash{$F}}, $F;
|
|
|
3796 |
}
|
|
|
3797 |
$IN->close;
|
|
|
3798 |
}
|
|
|
3799 |
}
|
|
|
3800 |
|
|
|
3801 |
# Loop over file sets having identical MD5 sums. Within
|
|
|
3802 |
# each set, pick the file that most resembles known source
|
|
|
3803 |
# code.
|
|
|
3804 |
my @unique = ();
|
|
|
3805 |
for my $md5 (sort keys %file_hash) {
|
|
|
3806 |
my $i_best = 0;
|
|
|
3807 |
for (my $i = 1; $i < scalar(@{$file_hash{$md5}}); $i++) {
|
|
|
3808 |
my $F = $file_hash{$md5}[$i];
|
|
|
3809 |
my (@nul_a, %nul_h);
|
|
|
3810 |
my $language = classify_file($F, $rh_Err,
|
|
|
3811 |
# don't save these errors; pointless
|
|
|
3812 |
\@nul_a, \%nul_h);
|
|
|
3813 |
$i_best = $i if $language ne "(unknown)";
|
|
|
3814 |
}
|
|
|
3815 |
# keep the best one found and identify the rest as ignored
|
|
|
3816 |
for (my $i = 0; $i < scalar(@{$file_hash{$md5}}); $i++) {
|
|
|
3817 |
if ($i == $i_best) {
|
|
|
3818 |
push @unique, $file_hash{$md5}[$i_best];
|
|
|
3819 |
} else {
|
|
|
3820 |
$rh_ignored->{$file_hash{$md5}[$i]} = "duplicate of " .
|
|
|
3821 |
$file_hash{$md5}[$i_best];
|
|
|
3822 |
}
|
|
|
3823 |
}
|
|
|
3824 |
|
|
|
3825 |
}
|
|
|
3826 |
print "<- different_files(@unique)\n" if $opt_v > 2;
|
|
|
3827 |
return @unique;
|
|
|
3828 |
} # 1}}}
|
|
|
3829 |
sub call_counter { # {{{1
|
|
|
3830 |
my ($file , # in
|
|
|
3831 |
$language , # in
|
|
|
3832 |
$ra_Errors, # out
|
|
|
3833 |
) = @_;
|
|
|
3834 |
|
|
|
3835 |
# Logic: pass the file through the following filters:
|
|
|
3836 |
# 1. remove blank lines
|
|
|
3837 |
# 2. remove comments using each filter defined for this language
|
|
|
3838 |
# (example: SQL has two, remove_starts_with(--) and
|
|
|
3839 |
# remove_c_comments() )
|
|
|
3840 |
# 3. compute comment lines as
|
|
|
3841 |
# total lines - blank lines - lines left over after all
|
|
|
3842 |
# comment filters have been applied
|
|
|
3843 |
|
|
|
3844 |
print "-> call_counter($file, $language)\n" if $opt_v > 2;
|
|
|
3845 |
#print "call_counter: ", Dumper(@routines), "\n";
|
|
|
3846 |
|
|
|
3847 |
my @lines = ();
|
|
|
3848 |
my $ascii = "";
|
|
|
3849 |
if (-B $file and $opt_unicode) {
|
|
|
3850 |
# was binary so must be unicode
|
|
|
3851 |
|
|
|
3852 |
$/ = undef;
|
|
|
3853 |
my $IN = new IO::File $file, "r";
|
|
|
3854 |
my $bin_text = <$IN>;
|
|
|
3855 |
$IN->close;
|
|
|
3856 |
$/ = "\n";
|
|
|
3857 |
|
|
|
3858 |
$ascii = unicode_to_ascii( $bin_text );
|
|
|
3859 |
@lines = split("\n", $ascii );
|
|
|
3860 |
foreach (@lines) { $_ = "$_\n"; }
|
|
|
3861 |
|
|
|
3862 |
} else {
|
|
|
3863 |
# regular text file
|
|
|
3864 |
@lines = read_file($file);
|
|
|
3865 |
$ascii = join('', @lines);
|
|
|
3866 |
}
|
|
|
3867 |
|
|
|
3868 |
my @original_lines = @lines;
|
|
|
3869 |
my $total_lines = scalar @lines;
|
|
|
3870 |
|
|
|
3871 |
print_lines($file, "Original file:", \@lines) if $opt_print_filter_stages;
|
|
|
3872 |
@lines = rm_blanks(\@lines, $language, \%EOL_Continuation_re); # remove blank lines
|
|
|
3873 |
my $blank_lines = $total_lines - scalar @lines;
|
|
|
3874 |
print_lines($file, "Blank lines removed:", \@lines)
|
|
|
3875 |
if $opt_print_filter_stages;
|
|
|
3876 |
|
|
|
3877 |
@lines = rm_comments(\@lines, $language, $file,
|
|
|
3878 |
\%EOL_Continuation_re);
|
|
|
3879 |
|
|
|
3880 |
my $comment_lines = $total_lines - $blank_lines - scalar @lines;
|
|
|
3881 |
if ($opt_strip_comments) {
|
|
|
3882 |
my $stripped_file = "";
|
|
|
3883 |
if ($opt_original_dir) {
|
|
|
3884 |
$stripped_file = $file . ".$opt_strip_comments";
|
|
|
3885 |
} else {
|
|
|
3886 |
$stripped_file = basename $file . ".$opt_strip_comments";
|
|
|
3887 |
}
|
|
|
3888 |
write_file($stripped_file, @lines);
|
|
|
3889 |
}
|
|
|
3890 |
if ($opt_html and !$opt_diff) {
|
|
|
3891 |
chomp(@original_lines); # includes blank lines, comments
|
|
|
3892 |
chomp(@lines); # no blank lines, no comments
|
|
|
3893 |
|
|
|
3894 |
my (@diff_L, @diff_R, %count);
|
|
|
3895 |
|
|
|
3896 |
# remove blank lines to get better quality diffs; count
|
|
|
3897 |
# blank lines separately
|
|
|
3898 |
my @original_lines_minus_white = ();
|
|
|
3899 |
# however must keep track of how many blank lines were removed and
|
|
|
3900 |
# where they were removed so that the HTML display can include it
|
|
|
3901 |
my %blank_line = ();
|
|
|
3902 |
my $insert_line = 0;
|
|
|
3903 |
foreach (@original_lines) {
|
|
|
3904 |
if (/^\s*$/) {
|
|
|
3905 |
++$count{blank}{same};
|
|
|
3906 |
++$blank_line{ $insert_line };
|
|
|
3907 |
} else {
|
|
|
3908 |
++$insert_line;
|
|
|
3909 |
push @original_lines_minus_white, $_;
|
|
|
3910 |
}
|
|
|
3911 |
}
|
|
|
3912 |
|
|
|
3913 |
array_diff( $file , # in
|
|
|
3914 |
\@original_lines_minus_white , # in
|
|
|
3915 |
\@lines , # in
|
|
|
3916 |
"comment" , # in
|
|
|
3917 |
\@diff_L, \@diff_R, , # out
|
|
|
3918 |
$ra_Errors); # in/out
|
|
|
3919 |
write_comments_to_html($file, \@diff_L, \@diff_R, \%blank_line);
|
|
|
3920 |
#print Dumper("count", \%count);
|
|
|
3921 |
}
|
|
|
3922 |
|
|
|
3923 |
print "<- call_counter($total_lines, $blank_lines, $comment_lines)\n"
|
|
|
3924 |
if $opt_v > 2;
|
|
|
3925 |
return ($total_lines, $blank_lines, $comment_lines);
|
|
|
3926 |
} # 1}}}
|
|
|
3927 |
sub windows_glob { # {{{1
|
|
|
3928 |
# Windows doesn't expand wildcards. Use code from Sean M. Burke's
|
|
|
3929 |
# Win32::Autoglob module to do this.
|
|
|
3930 |
return map {;
|
|
|
3931 |
( defined($_) and m/[\*\?]/ ) ? sort(glob($_)) : $_
|
|
|
3932 |
} @_;
|
|
|
3933 |
} # 1}}}
|
|
|
3934 |
sub write_file { # {{{1
|
|
|
3935 |
my ($file , # in
|
|
|
3936 |
@lines , # in
|
|
|
3937 |
) = @_;
|
|
|
3938 |
|
|
|
3939 |
#print "write_file 1 [$file]\n";
|
|
|
3940 |
# Do ~ expansion (by Tim LaBerge, fixes bug 2787984)
|
|
|
3941 |
my $preglob_filename = $file;
|
|
|
3942 |
#print "write_file 2 [$preglob_filename]\n";
|
|
|
3943 |
if ($ON_WINDOWS) {
|
|
|
3944 |
$file = (windows_glob($file))[0];
|
|
|
3945 |
} else {
|
|
|
3946 |
$file = File::Glob::glob($file);
|
|
|
3947 |
}
|
|
|
3948 |
#print "write_file 3 [$file]\n";
|
|
|
3949 |
$file = $preglob_filename unless $file;
|
|
|
3950 |
#print "write_file 4 [$file]\n";
|
|
|
3951 |
|
|
|
3952 |
print "-> write_file($file)\n" if $opt_v > 2;
|
|
|
3953 |
|
|
|
3954 |
# Create the destination directory if it doesn't already exist.
|
|
|
3955 |
my $abs_file_path = File::Spec->rel2abs( $file );
|
|
|
3956 |
my ($volume, $directories, $filename) = File::Spec->splitpath( $abs_file_path );
|
|
|
3957 |
mkpath($volume . $directories, 1, 0777);
|
|
|
3958 |
|
|
|
3959 |
my $OUT = new IO::File $file, "w";
|
|
|
3960 |
if (defined $OUT) {
|
|
|
3961 |
chomp(@lines);
|
|
|
3962 |
print $OUT join("\n", @lines), "\n";
|
|
|
3963 |
$OUT->close;
|
|
|
3964 |
} else {
|
|
|
3965 |
warn "Unable to write to $file\n";
|
|
|
3966 |
}
|
|
|
3967 |
print "Wrote $file" unless $opt_quiet;
|
|
|
3968 |
print ", $CLOC_XSL" if $opt_xsl and $opt_xsl eq $CLOC_XSL;
|
|
|
3969 |
print "\n" unless $opt_quiet;
|
|
|
3970 |
|
|
|
3971 |
print "<- write_file\n" if $opt_v > 2;
|
|
|
3972 |
} # 1}}}
|
|
|
3973 |
sub read_file { # {{{1
|
|
|
3974 |
my ($file, ) = @_;
|
|
|
3975 |
my %BoM = (
|
|
|
3976 |
"fe ff" => 2 ,
|
|
|
3977 |
"ff fe" => 2 ,
|
|
|
3978 |
"ef bb bf" => 3 ,
|
|
|
3979 |
"f7 64 4c" => 3 ,
|
|
|
3980 |
"0e fe ff" => 3 ,
|
|
|
3981 |
"fb ee 28" => 3 ,
|
|
|
3982 |
"00 00 fe ff" => 4 ,
|
|
|
3983 |
"ff fe 00 00" => 4 ,
|
|
|
3984 |
"2b 2f 76 38" => 4 ,
|
|
|
3985 |
"2b 2f 76 39" => 4 ,
|
|
|
3986 |
"2b 2f 76 2b" => 4 ,
|
|
|
3987 |
"2b 2f 76 2f" => 4 ,
|
|
|
3988 |
"dd 73 66 73" => 4 ,
|
|
|
3989 |
"84 31 95 33" => 4 ,
|
|
|
3990 |
"2b 2f 76 38 2d" => 5 ,
|
|
|
3991 |
);
|
|
|
3992 |
|
|
|
3993 |
print "-> read_file($file)\n" if $opt_v > 2;
|
|
|
3994 |
my @lines = ();
|
|
|
3995 |
my $IN = new IO::File $file, "r";
|
|
|
3996 |
if (defined $IN) {
|
|
|
3997 |
@lines = <$IN>;
|
|
|
3998 |
$IN->close;
|
|
|
3999 |
if ($lines[$#lines]) { # test necessary for zero content files
|
|
|
4000 |
# (superfluous?)
|
|
|
4001 |
# Some files don't end with a new line. Force this:
|
|
|
4002 |
$lines[$#lines] .= "\n" unless $lines[$#lines] =~ m/\n$/;
|
|
|
4003 |
}
|
|
|
4004 |
} else {
|
|
|
4005 |
warn "Unable to read $file\n";
|
|
|
4006 |
}
|
|
|
4007 |
|
|
|
4008 |
# Are first few characters of the file Unicode Byte Order
|
|
|
4009 |
# Marks (http://en.wikipedia.org/wiki/Byte_Order_Mark)?
|
|
|
4010 |
# If yes, remove them.
|
|
|
4011 |
if (@lines) {
|
|
|
4012 |
my @chrs = split('', $lines[0]);
|
|
|
4013 |
my $n_chrs = scalar @chrs;
|
|
|
4014 |
my ($n2, $n3, $n4, $n5) = ('', '', '', '');
|
|
|
4015 |
$n2 = sprintf("%x %x", map ord, @chrs[0,1]) if $n_chrs >= 2;
|
|
|
4016 |
$n3 = sprintf("%s %x", $n2, ord $chrs[2]) if $n_chrs >= 3;
|
|
|
4017 |
$n4 = sprintf("%s %x", $n3, ord $chrs[3]) if $n_chrs >= 4;
|
|
|
4018 |
$n5 = sprintf("%s %x", $n4, ord $chrs[4]) if $n_chrs >= 5;
|
|
|
4019 |
if (defined $BoM{$n2}) { $lines[0] = substr $lines[0], 2;
|
|
|
4020 |
} elsif (defined $BoM{$n3}) { $lines[0] = substr $lines[0], 3;
|
|
|
4021 |
} elsif (defined $BoM{$n4}) { $lines[0] = substr $lines[0], 4;
|
|
|
4022 |
} elsif (defined $BoM{$n5}) { $lines[0] = substr $lines[0], 5;
|
|
|
4023 |
}
|
|
|
4024 |
}
|
|
|
4025 |
|
|
|
4026 |
# Trim DOS line endings. This allows Windows files
|
|
|
4027 |
# to be diff'ed with Unix files without line endings
|
|
|
4028 |
# causing every line to differ.
|
|
|
4029 |
foreach (@lines) { s/\cM$// }
|
|
|
4030 |
|
|
|
4031 |
print "<- read_file\n" if $opt_v > 2;
|
|
|
4032 |
return @lines;
|
|
|
4033 |
} # 1}}}
|
|
|
4034 |
sub rm_blanks { # {{{1
|
|
|
4035 |
my ($ra_in ,
|
|
|
4036 |
$language ,
|
|
|
4037 |
$rh_EOL_continuation_re) = @_;
|
|
|
4038 |
print "-> rm_blanks(language=$language)\n" if $opt_v > 2;
|
|
|
4039 |
#print "rm_blanks: language = [$language]\n";
|
|
|
4040 |
my @out = ();
|
|
|
4041 |
if ($language eq "COBOL") {
|
|
|
4042 |
@out = remove_cobol_blanks($ra_in);
|
|
|
4043 |
} else {
|
|
|
4044 |
# removes blank lines
|
|
|
4045 |
if (defined $rh_EOL_continuation_re->{$language}) {
|
|
|
4046 |
@out = remove_matches_2re($ra_in, '^\s*$',
|
|
|
4047 |
$rh_EOL_continuation_re->{$language});
|
|
|
4048 |
} else {
|
|
|
4049 |
@out = remove_matches($ra_in, '^\s*$');
|
|
|
4050 |
}
|
|
|
4051 |
}
|
|
|
4052 |
|
|
|
4053 |
print "<- rm_blanks(language=$language)\n" if $opt_v > 2;
|
|
|
4054 |
return @out;
|
|
|
4055 |
} # 1}}}
|
|
|
4056 |
sub rm_comments { # {{{1
|
|
|
4057 |
my ($ra_lines , # in, must be free of blank lines
|
|
|
4058 |
$language , # in
|
|
|
4059 |
$file , # in (some language counters, eg Haskell, need
|
|
|
4060 |
# access to the original file)
|
|
|
4061 |
$rh_EOL_continuation_re , # in
|
|
|
4062 |
) = @_;
|
|
|
4063 |
print "-> rm_comments(file=$file)\n" if $opt_v > 2;
|
|
|
4064 |
my @routines = @{$Filters_by_Language{$language}};
|
|
|
4065 |
my @lines = @{$ra_lines};
|
|
|
4066 |
my @original_lines = @{$ra_lines};
|
|
|
4067 |
|
|
|
4068 |
if (!scalar @original_lines) {
|
|
|
4069 |
return @lines;
|
|
|
4070 |
}
|
|
|
4071 |
|
|
|
4072 |
foreach my $call_string (@routines) {
|
|
|
4073 |
my $subroutine = $call_string->[0];
|
|
|
4074 |
if (! defined &{$subroutine}) {
|
|
|
4075 |
warn "rm_comments undefined subroutine $subroutine for $file\n";
|
|
|
4076 |
next;
|
|
|
4077 |
}
|
|
|
4078 |
print "rm_comments file=$file sub=$subroutine\n" if $opt_v > 1;
|
|
|
4079 |
my @args = @{$call_string};
|
|
|
4080 |
shift @args; # drop the subroutine name
|
|
|
4081 |
if (@args and $args[0] eq '>filename<') {
|
|
|
4082 |
shift @args;
|
|
|
4083 |
unshift @args, $file;
|
|
|
4084 |
}
|
|
|
4085 |
|
|
|
4086 |
no strict 'refs';
|
|
|
4087 |
@lines = &{$subroutine}(\@lines, @args); # apply filter...
|
|
|
4088 |
|
|
|
4089 |
print_lines($file, "After $subroutine(@args)", \@lines)
|
|
|
4090 |
if $opt_print_filter_stages;
|
|
|
4091 |
# then remove blank lines which are created by comment removal
|
|
|
4092 |
if (defined $rh_EOL_continuation_re->{$language}) {
|
|
|
4093 |
@lines = remove_matches_2re(\@lines, '^\s*$',
|
|
|
4094 |
$rh_EOL_continuation_re->{$language});
|
|
|
4095 |
} else {
|
|
|
4096 |
@lines = remove_matches(\@lines, '^\s*$');
|
|
|
4097 |
}
|
|
|
4098 |
|
|
|
4099 |
print_lines($file, "post $subroutine(@args) blank cleanup:", \@lines)
|
|
|
4100 |
if $opt_print_filter_stages;
|
|
|
4101 |
}
|
|
|
4102 |
# Exception for scripting languages: treat the first #! line as code.
|
|
|
4103 |
# Will need to add it back in if it was removed earlier.
|
|
|
4104 |
if (defined $Script_Language{$language} and
|
|
|
4105 |
$original_lines[0] =~ /^#!/ and
|
|
|
4106 |
(scalar(@lines) == 0 or
|
|
|
4107 |
$lines[0] ne $original_lines[0])) {
|
|
|
4108 |
unshift @lines, $original_lines[0]; # add the first line back
|
|
|
4109 |
}
|
|
|
4110 |
|
|
|
4111 |
foreach (@lines) { chomp } # make sure no spurious newlines were added
|
|
|
4112 |
|
|
|
4113 |
print "<- rm_comments\n" if $opt_v > 2;
|
|
|
4114 |
return @lines;
|
|
|
4115 |
} # 1}}}
|
|
|
4116 |
sub remove_f77_comments { # {{{1
|
|
|
4117 |
my ($ra_lines, ) = @_;
|
|
|
4118 |
print "-> remove_f77_comments\n" if $opt_v > 2;
|
|
|
4119 |
|
|
|
4120 |
my @save_lines = ();
|
|
|
4121 |
foreach (@{$ra_lines}) {
|
|
|
4122 |
next if m{^[*cC]};
|
|
|
4123 |
next if m{^\s*!};
|
|
|
4124 |
push @save_lines, $_;
|
|
|
4125 |
}
|
|
|
4126 |
|
|
|
4127 |
print "<- remove_f77_comments\n" if $opt_v > 2;
|
|
|
4128 |
return @save_lines;
|
|
|
4129 |
} # 1}}}
|
|
|
4130 |
sub remove_f90_comments { # {{{1
|
|
|
4131 |
# derived from SLOCCount
|
|
|
4132 |
my ($ra_lines, ) = @_;
|
|
|
4133 |
print "-> remove_f90_comments\n" if $opt_v > 2;
|
|
|
4134 |
|
|
|
4135 |
my @save_lines = ();
|
|
|
4136 |
foreach (@{$ra_lines}) {
|
|
|
4137 |
# a comment is m/^\s*!/
|
|
|
4138 |
# an empty line is m/^\s*$/
|
|
|
4139 |
# a HPF statement is m/^\s*!hpf\$/i
|
|
|
4140 |
# an Open MP statement is m/^\s*!omp\$/i
|
|
|
4141 |
if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) {
|
|
|
4142 |
push @save_lines, $_;
|
|
|
4143 |
}
|
|
|
4144 |
}
|
|
|
4145 |
|
|
|
4146 |
print "<- remove_f90_comments\n" if $opt_v > 2;
|
|
|
4147 |
return @save_lines;
|
|
|
4148 |
} # 1}}}
|
|
|
4149 |
sub remove_matches { # {{{1
|
|
|
4150 |
my ($ra_lines, # in
|
|
|
4151 |
$pattern , # in Perl regular expression (case insensitive)
|
|
|
4152 |
) = @_;
|
|
|
4153 |
print "-> remove_matches(pattern=$pattern)\n" if $opt_v > 2;
|
|
|
4154 |
|
|
|
4155 |
my @save_lines = ();
|
|
|
4156 |
foreach (@{$ra_lines}) {
|
|
|
4157 |
#chomp; print "remove_matches [$pattern] [$_]\n";
|
|
|
4158 |
next if m{$pattern}i;
|
|
|
4159 |
push @save_lines, $_;
|
|
|
4160 |
}
|
|
|
4161 |
|
|
|
4162 |
print "<- remove_matches\n" if $opt_v > 2;
|
|
|
4163 |
#print "remove_matches returning\n ", join("\n ", @save_lines), "\n";
|
|
|
4164 |
return @save_lines;
|
|
|
4165 |
} # 1}}}
|
|
|
4166 |
sub remove_matches_2re { # {{{1
|
|
|
4167 |
my ($ra_lines, # in
|
|
|
4168 |
$pattern1, # in Perl regex 1 (case insensitive) to match
|
|
|
4169 |
$pattern2, # in Perl regex 2 (case insensitive) to not match prev line
|
|
|
4170 |
) = @_;
|
|
|
4171 |
print "-> remove_matches_2re(pattern=$pattern1,$pattern2)\n" if $opt_v > 2;
|
|
|
4172 |
|
|
|
4173 |
my @save_lines = ();
|
|
|
4174 |
for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
|
|
|
4175 |
# chomp($ra_lines->[$i]);
|
|
|
4176 |
#print "remove_matches_2re [$pattern1] [$pattern2] [$ra_lines->[$i]]\n";
|
|
|
4177 |
if ($i) {
|
|
|
4178 |
#print "remove_matches_2re prev=[$ra_lines->[$i-1]] this=[$ra_lines->[$i]]\n";
|
|
|
4179 |
next if ($ra_lines->[$i] =~ m{$pattern1}i) and
|
|
|
4180 |
($ra_lines->[$i-1] !~ m{$pattern2}i);
|
|
|
4181 |
} else {
|
|
|
4182 |
# on first line
|
|
|
4183 |
next if $ra_lines->[$i] =~ m{$pattern1}i;
|
|
|
4184 |
}
|
|
|
4185 |
push @save_lines, $ra_lines->[$i];
|
|
|
4186 |
}
|
|
|
4187 |
|
|
|
4188 |
print "<- remove_matches_2re\n" if $opt_v > 2;
|
|
|
4189 |
#print "remove_matches_2re returning\n ", join("\n ", @save_lines), "\n";
|
|
|
4190 |
return @save_lines;
|
|
|
4191 |
} # 1}}}
|
|
|
4192 |
sub remove_inline { # {{{1
|
|
|
4193 |
my ($ra_lines, # in
|
|
|
4194 |
$pattern , # in Perl regular expression (case insensitive)
|
|
|
4195 |
) = @_;
|
|
|
4196 |
print "-> remove_inline(pattern=$pattern)\n" if $opt_v > 2;
|
|
|
4197 |
|
|
|
4198 |
my @save_lines = ();
|
|
|
4199 |
unless ($opt_inline) {
|
|
|
4200 |
return @{$ra_lines};
|
|
|
4201 |
}
|
|
|
4202 |
my $nLines_affected = 0;
|
|
|
4203 |
foreach (@{$ra_lines}) {
|
|
|
4204 |
#chomp; print "remove_inline [$pattern] [$_]\n";
|
|
|
4205 |
if (m{$pattern}i) {
|
|
|
4206 |
++$nLines_affected;
|
|
|
4207 |
s{$pattern}{}i;
|
|
|
4208 |
}
|
|
|
4209 |
push @save_lines, $_;
|
|
|
4210 |
}
|
|
|
4211 |
|
|
|
4212 |
print "<- remove_inline\n" if $opt_v > 2;
|
|
|
4213 |
#print "remove_inline returning\n ", join("\n ", @save_lines), "\n";
|
|
|
4214 |
return @save_lines;
|
|
|
4215 |
} # 1}}}
|
|
|
4216 |
sub remove_above { # {{{1
|
|
|
4217 |
my ($ra_lines, $marker, ) = @_;
|
|
|
4218 |
print "-> remove_above(marker=$marker)\n" if $opt_v > 2;
|
|
|
4219 |
|
|
|
4220 |
# Make two passes through the code:
|
|
|
4221 |
# 1. check if the marker exists
|
|
|
4222 |
# 2. remove anything above the marker if it exists,
|
|
|
4223 |
# do nothing if the marker does not exist
|
|
|
4224 |
|
|
|
4225 |
# Pass 1
|
|
|
4226 |
my $found_marker = 0;
|
|
|
4227 |
for (my $line_number = 1;
|
|
|
4228 |
$line_number <= scalar @{$ra_lines};
|
|
|
4229 |
$line_number++) {
|
|
|
4230 |
if ($ra_lines->[$line_number-1] =~ m{$marker}) {
|
|
|
4231 |
$found_marker = $line_number;
|
|
|
4232 |
last;
|
|
|
4233 |
}
|
|
|
4234 |
}
|
|
|
4235 |
|
|
|
4236 |
# Pass 2 only if needed
|
|
|
4237 |
my @save_lines = ();
|
|
|
4238 |
if ($found_marker) {
|
|
|
4239 |
my $n = 1;
|
|
|
4240 |
foreach (@{$ra_lines}) {
|
|
|
4241 |
push @save_lines, $_
|
|
|
4242 |
if $n >= $found_marker;
|
|
|
4243 |
++$n;
|
|
|
4244 |
}
|
|
|
4245 |
} else { # marker wasn't found; save all lines
|
|
|
4246 |
foreach (@{$ra_lines}) {
|
|
|
4247 |
push @save_lines, $_;
|
|
|
4248 |
}
|
|
|
4249 |
}
|
|
|
4250 |
|
|
|
4251 |
print "<- remove_above\n" if $opt_v > 2;
|
|
|
4252 |
return @save_lines;
|
|
|
4253 |
} # 1}}}
|
|
|
4254 |
sub remove_below { # {{{1
|
|
|
4255 |
my ($ra_lines, $marker, ) = @_;
|
|
|
4256 |
print "-> remove_below(marker=$marker)\n" if $opt_v > 2;
|
|
|
4257 |
|
|
|
4258 |
my @save_lines = ();
|
|
|
4259 |
foreach (@{$ra_lines}) {
|
|
|
4260 |
last if m{$marker};
|
|
|
4261 |
push @save_lines, $_;
|
|
|
4262 |
}
|
|
|
4263 |
|
|
|
4264 |
print "<- remove_below\n" if $opt_v > 2;
|
|
|
4265 |
return @save_lines;
|
|
|
4266 |
} # 1}}}
|
|
|
4267 |
sub remove_below_above { # {{{1
|
|
|
4268 |
my ($ra_lines, $marker_below, $marker_above, ) = @_;
|
|
|
4269 |
# delete lines delimited by start and end line markers such
|
|
|
4270 |
# as Perl POD documentation
|
|
|
4271 |
print "-> remove_below_above(markerB=$marker_below, A=$marker_above)\n"
|
|
|
4272 |
if $opt_v > 2;
|
|
|
4273 |
|
|
|
4274 |
my @save_lines = ();
|
|
|
4275 |
my $between = 0;
|
|
|
4276 |
foreach (@{$ra_lines}) {
|
|
|
4277 |
if (!$between and m{$marker_below}) {
|
|
|
4278 |
$between = 1;
|
|
|
4279 |
next;
|
|
|
4280 |
}
|
|
|
4281 |
if ($between and m{$marker_above}) {
|
|
|
4282 |
$between = 0;
|
|
|
4283 |
next;
|
|
|
4284 |
}
|
|
|
4285 |
next if $between;
|
|
|
4286 |
push @save_lines, $_;
|
|
|
4287 |
}
|
|
|
4288 |
|
|
|
4289 |
print "<- remove_below_above\n" if $opt_v > 2;
|
|
|
4290 |
return @save_lines;
|
|
|
4291 |
} # 1}}}
|
|
|
4292 |
sub remove_between { # {{{1
|
|
|
4293 |
my ($ra_lines, $marker, ) = @_;
|
|
|
4294 |
# $marker must contain one of the balanced pairs understood
|
|
|
4295 |
# by Regexp::Common::balanced, namely
|
|
|
4296 |
# '{}' '()' '[]' or '<>'
|
|
|
4297 |
|
|
|
4298 |
print "-> remove_between(marker=$marker)\n" if $opt_v > 2;
|
|
|
4299 |
my %acceptable = ('{}'=>1, '()'=>1, '[]'=>1, '<>'=>1, );
|
|
|
4300 |
die "remove_between: invalid delimiter '$marker'\n",
|
|
|
4301 |
"the delimiter must be one of these four pairs:\n",
|
|
|
4302 |
"{} () [] <>\n" unless
|
|
|
4303 |
$acceptable{$marker};
|
|
|
4304 |
|
|
|
4305 |
Install_Regexp_Common() unless $HAVE_Rexexp_Common;
|
|
|
4306 |
|
|
|
4307 |
my $all_lines = join("", @{$ra_lines});
|
|
|
4308 |
|
|
|
4309 |
no strict 'vars';
|
|
|
4310 |
# otherwise get:
|
|
|
4311 |
# Global symbol "%RE" requires explicit package name at cloc line xx.
|
|
|
4312 |
if ($all_lines =~ m/$RE{balanced}{-parens => $marker}/) {
|
|
|
4313 |
no warnings;
|
|
|
4314 |
$all_lines =~ s/$1//g;
|
|
|
4315 |
}
|
|
|
4316 |
|
|
|
4317 |
print "<- remove_between\n" if $opt_v > 2;
|
|
|
4318 |
return split("\n", $all_lines);
|
|
|
4319 |
} # 1}}}
|
|
|
4320 |
sub remove_between_general { # {{{1
|
|
|
4321 |
my ($ra_lines, $start_marker, $end_marker, ) = @_;
|
|
|
4322 |
# Start and end markers may be any length strings.
|
|
|
4323 |
|
|
|
4324 |
print "-> remove_between_general(start=$start_marker, end=$end_marker)\n"
|
|
|
4325 |
if $opt_v > 2;
|
|
|
4326 |
|
|
|
4327 |
my $all_lines = join("", @{$ra_lines});
|
|
|
4328 |
|
|
|
4329 |
my @save_lines = ();
|
|
|
4330 |
my $in_comment = 0;
|
|
|
4331 |
foreach (@{$ra_lines}) {
|
|
|
4332 |
|
|
|
4333 |
next if /^\s*$/;
|
|
|
4334 |
s/\Q$start_marker\E.*?\Q$end_marker\E//g; # strip one-line comments
|
|
|
4335 |
next if /^\s*$/;
|
|
|
4336 |
if ($in_comment) {
|
|
|
4337 |
if (/\Q$end_marker\E/) {
|
|
|
4338 |
s/^.*?\Q$end_marker\E//;
|
|
|
4339 |
$in_comment = 0;
|
|
|
4340 |
}
|
|
|
4341 |
next if $in_comment;
|
|
|
4342 |
}
|
|
|
4343 |
next if /^\s*$/;
|
|
|
4344 |
$in_comment = 1 if /^(.*?)\Q$start_marker\E/; # $1 may be blank or code
|
|
|
4345 |
next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
|
|
|
4346 |
if ($in_comment) {
|
|
|
4347 |
# part code, part comment; strip the comment and keep the code
|
|
|
4348 |
s/^(.*?)\Q$start_marker\E.*$/$1/;
|
|
|
4349 |
}
|
|
|
4350 |
push @save_lines, $_;
|
|
|
4351 |
}
|
|
|
4352 |
|
|
|
4353 |
print "<- remove_between_general\n" if $opt_v > 2;
|
|
|
4354 |
return @save_lines;
|
|
|
4355 |
} # 1}}}
|
|
|
4356 |
sub remove_between_regex { # {{{1
|
|
|
4357 |
my ($ra_lines, $start_RE, $end_RE, ) = @_;
|
|
|
4358 |
# Start and end regex's may be any length strings.
|
|
|
4359 |
|
|
|
4360 |
print "-> remove_between_regex(start=$start_RE, end=$end_RE)\n"
|
|
|
4361 |
if $opt_v > 2;
|
|
|
4362 |
|
|
|
4363 |
my $all_lines = join("", @{$ra_lines});
|
|
|
4364 |
|
|
|
4365 |
my @save_lines = ();
|
|
|
4366 |
my $in_comment = 0;
|
|
|
4367 |
foreach (@{$ra_lines}) {
|
|
|
4368 |
|
|
|
4369 |
next if /^\s*$/;
|
|
|
4370 |
s/${start_RE}.*?${end_RE}//g; # strip one-line comments
|
|
|
4371 |
next if /^\s*$/;
|
|
|
4372 |
if ($in_comment) {
|
|
|
4373 |
if (/$end_RE/) {
|
|
|
4374 |
s/^.*?${end_RE}//;
|
|
|
4375 |
$in_comment = 0;
|
|
|
4376 |
}
|
|
|
4377 |
next if $in_comment;
|
|
|
4378 |
}
|
|
|
4379 |
next if /^\s*$/;
|
|
|
4380 |
$in_comment = 1 if /^(.*?)${start_RE}/; # $1 may be blank or code
|
|
|
4381 |
next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
|
|
|
4382 |
if ($in_comment) {
|
|
|
4383 |
# part code, part comment; strip the comment and keep the code
|
|
|
4384 |
s/^(.*?)${start_RE}.*$/$1/;
|
|
|
4385 |
}
|
|
|
4386 |
push @save_lines, $_;
|
|
|
4387 |
}
|
|
|
4388 |
|
|
|
4389 |
print "<- remove_between_regex\n" if $opt_v > 2;
|
|
|
4390 |
return @save_lines;
|
|
|
4391 |
} # 1}}}
|
|
|
4392 |
sub remove_cobol_blanks { # {{{1
|
|
|
4393 |
# subroutines derived from SLOCCount
|
|
|
4394 |
my ($ra_lines, ) = @_;
|
|
|
4395 |
|
|
|
4396 |
my $free_format = 0; # Support "free format" source code.
|
|
|
4397 |
my @save_lines = ();
|
|
|
4398 |
|
|
|
4399 |
foreach (@{$ra_lines}) {
|
|
|
4400 |
next if m/^\s*$/;
|
|
|
4401 |
my $line = expand($_); # convert tabs to equivalent spaces
|
|
|
4402 |
$free_format = 1 if $line =~ m/^......\$.*SET.*SOURCEFORMAT.*FREE/i;
|
|
|
4403 |
if ($free_format) {
|
|
|
4404 |
push @save_lines, $_;
|
|
|
4405 |
} else {
|
|
|
4406 |
# Greg Toth:
|
|
|
4407 |
# (1) Treat lines with any alphanum in cols 1-6 and
|
|
|
4408 |
# blanks in cols 7 through 71 as blank line, and
|
|
|
4409 |
# (2) Treat lines with any alphanum in cols 1-6 and
|
|
|
4410 |
# slash (/) in col 7 as blank line (this is a
|
|
|
4411 |
# page eject directive).
|
|
|
4412 |
push @save_lines, $_ unless m/^\d{6}\s*$/ or
|
|
|
4413 |
($line =~ m/^.{6}\s{66}/) or
|
|
|
4414 |
($line =~ m/^......\//);
|
|
|
4415 |
}
|
|
|
4416 |
}
|
|
|
4417 |
return @save_lines;
|
|
|
4418 |
} # 1}}}
|
|
|
4419 |
sub remove_cobol_comments { # {{{1
|
|
|
4420 |
# subroutines derived from SLOCCount
|
|
|
4421 |
my ($ra_lines, ) = @_;
|
|
|
4422 |
|
|
|
4423 |
my $free_format = 0; # Support "free format" source code.
|
|
|
4424 |
my @save_lines = ();
|
|
|
4425 |
|
|
|
4426 |
foreach (@{$ra_lines}) {
|
|
|
4427 |
if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;}
|
|
|
4428 |
if ($free_format) {
|
|
|
4429 |
push @save_lines, $_ unless m{^\s*\*};
|
|
|
4430 |
} else {
|
|
|
4431 |
push @save_lines, $_ unless m{^......\*} or m{^\*};
|
|
|
4432 |
}
|
|
|
4433 |
}
|
|
|
4434 |
return @save_lines;
|
|
|
4435 |
} # 1}}}
|
|
|
4436 |
sub remove_jcl_comments { # {{{1
|
|
|
4437 |
my ($ra_lines, ) = @_;
|
|
|
4438 |
|
|
|
4439 |
print "-> remove_jcl_comments\n" if $opt_v > 2;
|
|
|
4440 |
|
|
|
4441 |
my @save_lines = ();
|
|
|
4442 |
my $in_comment = 0;
|
|
|
4443 |
foreach (@{$ra_lines}) {
|
|
|
4444 |
next if /^\s*$/;
|
|
|
4445 |
next if m{^\s*//\*};
|
|
|
4446 |
last if m{^\s*//\s*$};
|
|
|
4447 |
push @save_lines, $_;
|
|
|
4448 |
}
|
|
|
4449 |
|
|
|
4450 |
print "<- remove_jcl_comments\n" if $opt_v > 2;
|
|
|
4451 |
return @save_lines;
|
|
|
4452 |
} # 1}}}
|
|
|
4453 |
sub remove_jsp_comments { # {{{1
|
|
|
4454 |
# JSP comment is <%-- body of comment --%>
|
|
|
4455 |
my ($ra_lines, ) = @_;
|
|
|
4456 |
|
|
|
4457 |
print "-> remove_jsp_comments\n" if $opt_v > 2;
|
|
|
4458 |
|
|
|
4459 |
my @save_lines = ();
|
|
|
4460 |
my $in_comment = 0;
|
|
|
4461 |
foreach (@{$ra_lines}) {
|
|
|
4462 |
|
|
|
4463 |
next if /^\s*$/;
|
|
|
4464 |
s/<\%\-\-.*?\-\-\%>//g; # strip one-line comments
|
|
|
4465 |
next if /^\s*$/;
|
|
|
4466 |
if ($in_comment) {
|
|
|
4467 |
if (/\-\-\%>/) {
|
|
|
4468 |
s/^.*?\-\-\%>//;
|
|
|
4469 |
$in_comment = 0;
|
|
|
4470 |
}
|
|
|
4471 |
}
|
|
|
4472 |
next if /^\s*$/;
|
|
|
4473 |
$in_comment = 1 if /^(.*?)<\%\-\-/;
|
|
|
4474 |
next if defined $1 and $1 =~ /^\s*$/;
|
|
|
4475 |
next if ($in_comment);
|
|
|
4476 |
push @save_lines, $_;
|
|
|
4477 |
}
|
|
|
4478 |
|
|
|
4479 |
print "<- remove_jsp_comments\n" if $opt_v > 2;
|
|
|
4480 |
return @save_lines;
|
|
|
4481 |
} # 1}}}
|
|
|
4482 |
sub remove_html_comments { # {{{1
|
|
|
4483 |
# HTML comment is <!-- body of comment -->
|
|
|
4484 |
# Need to use my own routine until the HTML comment regex in
|
|
|
4485 |
# the Regexp::Common module can handle <!-- -- -->
|
|
|
4486 |
my ($ra_lines, ) = @_;
|
|
|
4487 |
|
|
|
4488 |
print "-> remove_html_comments\n" if $opt_v > 2;
|
|
|
4489 |
|
|
|
4490 |
my @save_lines = ();
|
|
|
4491 |
my $in_comment = 0;
|
|
|
4492 |
foreach (@{$ra_lines}) {
|
|
|
4493 |
|
|
|
4494 |
next if /^\s*$/;
|
|
|
4495 |
s/<!\-\-.*?\-\->//g; # strip one-line comments
|
|
|
4496 |
next if /^\s*$/;
|
|
|
4497 |
if ($in_comment) {
|
|
|
4498 |
if (/\-\->/) {
|
|
|
4499 |
s/^.*?\-\->//;
|
|
|
4500 |
$in_comment = 0;
|
|
|
4501 |
}
|
|
|
4502 |
}
|
|
|
4503 |
next if /^\s*$/;
|
|
|
4504 |
$in_comment = 1 if /^(.*?)<!\-\-/;
|
|
|
4505 |
next if defined $1 and $1 =~ /^\s*$/;
|
|
|
4506 |
next if ($in_comment);
|
|
|
4507 |
push @save_lines, $_;
|
|
|
4508 |
}
|
|
|
4509 |
|
|
|
4510 |
print "<- remove_html_comments\n" if $opt_v > 2;
|
|
|
4511 |
return @save_lines;
|
|
|
4512 |
} # 1}}}
|
|
|
4513 |
sub remove_haml_block { # {{{1
|
|
|
4514 |
# Haml block comments are defined by a silent comment marker like
|
|
|
4515 |
# /
|
|
|
4516 |
# or
|
|
|
4517 |
# -#
|
|
|
4518 |
# followed by indented text on subsequent lines.
|
|
|
4519 |
# http://haml.info/docs/yardoc/file.REFERENCE.html#comments
|
|
|
4520 |
my ($ra_lines, ) = @_;
|
|
|
4521 |
|
|
|
4522 |
print "-> remove_haml_block\n" if $opt_v > 2;
|
|
|
4523 |
|
|
|
4524 |
my @save_lines = ();
|
|
|
4525 |
my $in_comment = 0;
|
|
|
4526 |
foreach (@{$ra_lines}) {
|
|
|
4527 |
|
|
|
4528 |
next if /^\s*$/;
|
|
|
4529 |
my $line = expand($_); # convert tabs to equivalent spaces
|
|
|
4530 |
if ($in_comment) {
|
|
|
4531 |
$line =~ /^(\s*)/;
|
|
|
4532 |
# print "indent=", length $1, "\n";
|
|
|
4533 |
if (length $1 < $in_comment) {
|
|
|
4534 |
# indent level is less than comment level
|
|
|
4535 |
# are back in code
|
|
|
4536 |
$in_comment = 0;
|
|
|
4537 |
} else {
|
|
|
4538 |
# still in comments, don't use this line
|
|
|
4539 |
next;
|
|
|
4540 |
}
|
|
|
4541 |
} elsif ($line =~ m{^(\s*)(/|-#)\s*$}) {
|
|
|
4542 |
if ($1) {
|
|
|
4543 |
$in_comment = length $1 + 1; # number of leading spaces + 1
|
|
|
4544 |
} else {
|
|
|
4545 |
$in_comment = 1;
|
|
|
4546 |
}
|
|
|
4547 |
# print "in_comment=$in_comment\n";
|
|
|
4548 |
next;
|
|
|
4549 |
}
|
|
|
4550 |
push @save_lines, $line;
|
|
|
4551 |
}
|
|
|
4552 |
|
|
|
4553 |
print "<- remove_haml_block\n" if $opt_v > 2;
|
|
|
4554 |
return @save_lines;
|
|
|
4555 |
} # 1}}}
|
|
|
4556 |
sub add_newlines { # {{{1
|
|
|
4557 |
my ($ra_lines, ) = @_;
|
|
|
4558 |
print "-> add_newlines \n" if $opt_v > 2;
|
|
|
4559 |
|
|
|
4560 |
my @save_lines = ();
|
|
|
4561 |
foreach (@{$ra_lines}) {
|
|
|
4562 |
|
|
|
4563 |
push @save_lines, "$_\n";
|
|
|
4564 |
}
|
|
|
4565 |
|
|
|
4566 |
print "<- add_newlines \n" if $opt_v > 2;
|
|
|
4567 |
return @save_lines;
|
|
|
4568 |
} # 1}}}
|
|
|
4569 |
sub docstring_to_C { # {{{1
|
|
|
4570 |
my ($ra_lines, ) = @_;
|
|
|
4571 |
# Converts Python docstrings to C comments.
|
|
|
4572 |
|
|
|
4573 |
print "-> docstring_to_C()\n" if $opt_v > 2;
|
|
|
4574 |
|
|
|
4575 |
my $in_docstring = 0;
|
|
|
4576 |
foreach (@{$ra_lines}) {
|
|
|
4577 |
while (/"""/) {
|
|
|
4578 |
if (!$in_docstring) {
|
|
|
4579 |
s{[uU]?"""}{/*};
|
|
|
4580 |
$in_docstring = 1;
|
|
|
4581 |
} else {
|
|
|
4582 |
s{"""}{*/};
|
|
|
4583 |
$in_docstring = 0;
|
|
|
4584 |
}
|
|
|
4585 |
}
|
|
|
4586 |
}
|
|
|
4587 |
|
|
|
4588 |
print "<- docstring_to_C\n" if $opt_v > 2;
|
|
|
4589 |
return @{$ra_lines};
|
|
|
4590 |
} # 1}}}
|
|
|
4591 |
sub powershell_to_C { # {{{1
|
|
|
4592 |
my ($ra_lines, ) = @_;
|
|
|
4593 |
# Converts PowerShell block comment markers to C comments.
|
|
|
4594 |
|
|
|
4595 |
print "-> powershell_to_C()\n" if $opt_v > 2;
|
|
|
4596 |
|
|
|
4597 |
my $in_docstring = 0;
|
|
|
4598 |
foreach (@{$ra_lines}) {
|
|
|
4599 |
s{<#}{/*}g;
|
|
|
4600 |
s{#>}{*/}g;
|
|
|
4601 |
}
|
|
|
4602 |
|
|
|
4603 |
print "<- powershell_to_C\n" if $opt_v > 2;
|
|
|
4604 |
return @{$ra_lines};
|
|
|
4605 |
} # 1}}}
|
|
|
4606 |
sub smarty_to_C { # {{{1
|
|
|
4607 |
my ($ra_lines, ) = @_;
|
|
|
4608 |
# Converts Smarty comments to C comments.
|
|
|
4609 |
|
|
|
4610 |
print "-> smarty_to_C()\n" if $opt_v > 2;
|
|
|
4611 |
|
|
|
4612 |
foreach (@{$ra_lines}) {
|
|
|
4613 |
s[{\*][/*]g;
|
|
|
4614 |
s[\*}][*/]g;
|
|
|
4615 |
}
|
|
|
4616 |
|
|
|
4617 |
print "<- smarty_to_C\n" if $opt_v > 2;
|
|
|
4618 |
return @{$ra_lines};
|
|
|
4619 |
} # 1}}}
|
|
|
4620 |
sub determine_lit_type { # {{{1
|
|
|
4621 |
my ($file) = @_;
|
|
|
4622 |
|
|
|
4623 |
open (FILE, $file);
|
|
|
4624 |
while (<FILE>) {
|
|
|
4625 |
if (m/^\\begin\{code\}/) { close FILE; return 2; }
|
|
|
4626 |
if (m/^>\s/) { close FILE; return 1; }
|
|
|
4627 |
}
|
|
|
4628 |
|
|
|
4629 |
return 0;
|
|
|
4630 |
} # 1}}}
|
|
|
4631 |
sub remove_haskell_comments { # {{{1
|
|
|
4632 |
# Bulk of code taken from SLOCCount's haskell_count script.
|
|
|
4633 |
# Strips out {- .. -} and -- comments and counts the rest.
|
|
|
4634 |
# Pragmas, {-#...}, are counted as SLOC.
|
|
|
4635 |
# BUG: Doesn't handle strings with embedded block comment markers gracefully.
|
|
|
4636 |
# In practice, that shouldn't be a problem.
|
|
|
4637 |
my ($ra_lines, $file, ) = @_;
|
|
|
4638 |
|
|
|
4639 |
print "-> remove_haskell_comments\n" if $opt_v > 2;
|
|
|
4640 |
|
|
|
4641 |
my @save_lines = ();
|
|
|
4642 |
my $in_comment = 0;
|
|
|
4643 |
my $incomment = 0;
|
|
|
4644 |
my ($literate, $inlitblock) = (0,0);
|
|
|
4645 |
|
|
|
4646 |
$literate = 1 if $file =~ /\.lhs$/;
|
|
|
4647 |
if($literate) { $literate = determine_lit_type($file) }
|
|
|
4648 |
|
|
|
4649 |
foreach (@{$ra_lines}) {
|
|
|
4650 |
if ($literate == 1) {
|
|
|
4651 |
if (!s/^>//) { s/.*//; }
|
|
|
4652 |
} elsif ($literate == 2) {
|
|
|
4653 |
if ($inlitblock) {
|
|
|
4654 |
if (m/^\\end\{code\}/) { s/.*//; $inlitblock = 0; }
|
|
|
4655 |
} elsif (!$inlitblock) {
|
|
|
4656 |
if (m/^\\begin\{code\}/) { s/.*//; $inlitblock = 1; }
|
|
|
4657 |
else { s/.*//; }
|
|
|
4658 |
}
|
|
|
4659 |
}
|
|
|
4660 |
|
|
|
4661 |
if ($incomment) {
|
|
|
4662 |
if (m/\-\}/) { s/^.*?\-\}//; $incomment = 0;}
|
|
|
4663 |
else { s/.*//; }
|
|
|
4664 |
}
|
|
|
4665 |
if (!$incomment) {
|
|
|
4666 |
s/--.*//;
|
|
|
4667 |
s!{-[^#].*?-}!!g;
|
|
|
4668 |
if (m/{-/ && (!m/{-#/)) {
|
|
|
4669 |
s/{-.*//;
|
|
|
4670 |
$incomment = 1;
|
|
|
4671 |
}
|
|
|
4672 |
}
|
|
|
4673 |
if (m/\S/) { push @save_lines, $_; }
|
|
|
4674 |
}
|
|
|
4675 |
# if ($incomment) {print "ERROR: ended in comment in $ARGV\n";}
|
|
|
4676 |
|
|
|
4677 |
print "<- remove_haskell_comments\n" if $opt_v > 2;
|
|
|
4678 |
return @save_lines;
|
|
|
4679 |
} # 1}}}
|
|
|
4680 |
sub print_lines { # {{{1
|
|
|
4681 |
my ($file , # in
|
|
|
4682 |
$title , # in
|
|
|
4683 |
$ra_lines , # in
|
|
|
4684 |
) = @_;
|
|
|
4685 |
printf "->%-30s %s\n", $file, $title;
|
|
|
4686 |
for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
|
|
|
4687 |
printf "%5d | %s", $i+1, $ra_lines->[$i];
|
|
|
4688 |
print "\n" unless $ra_lines->[$i] =~ m{\n$}
|
|
|
4689 |
}
|
|
|
4690 |
} # 1}}}
|
|
|
4691 |
sub set_constants { # {{{1
|
|
|
4692 |
my ($rh_Language_by_Extension , # out
|
|
|
4693 |
$rh_Language_by_Script , # out
|
|
|
4694 |
$rh_Language_by_File , # out
|
|
|
4695 |
$rhaa_Filters_by_Language , # out
|
|
|
4696 |
$rh_Not_Code_Extension , # out
|
|
|
4697 |
$rh_Not_Code_Filename , # out
|
|
|
4698 |
$rh_Scale_Factor , # out
|
|
|
4699 |
$rh_Known_Binary_Archives , # out
|
|
|
4700 |
$rh_EOL_continuation_re , # out
|
|
|
4701 |
) = @_;
|
|
|
4702 |
# 1}}}
|
|
|
4703 |
%{$rh_Language_by_Extension} = ( # {{{1
|
|
|
4704 |
'abap' => 'ABAP' ,
|
|
|
4705 |
'ac' => 'm4' ,
|
|
|
4706 |
'ada' => 'Ada' ,
|
|
|
4707 |
'adb' => 'Ada' ,
|
|
|
4708 |
'ads' => 'Ada' ,
|
|
|
4709 |
'adso' => 'ADSO/IDSM' ,
|
|
|
4710 |
'ahk' => 'AutoHotkey' ,
|
|
|
4711 |
'am' => 'make' ,
|
|
|
4712 |
'ample' => 'AMPLE' ,
|
|
|
4713 |
'as' => 'ActionScript' ,
|
|
|
4714 |
'dofile' => 'AMPLE' ,
|
|
|
4715 |
'startup' => 'AMPLE' ,
|
|
|
4716 |
'asa' => 'ASP' ,
|
|
|
4717 |
'asax' => 'ASP.Net' ,
|
|
|
4718 |
'ascx' => 'ASP.Net' ,
|
|
|
4719 |
'asm' => 'Assembly' ,
|
|
|
4720 |
'asmx' => 'ASP.Net' ,
|
|
|
4721 |
'asp' => 'ASP' ,
|
|
|
4722 |
'aspx' => 'ASP.Net' ,
|
|
|
4723 |
'master' => 'ASP.Net' ,
|
|
|
4724 |
'sitemap' => 'ASP.Net' ,
|
|
|
4725 |
'cshtml' => 'Razor' ,
|
|
|
4726 |
'awk' => 'awk' ,
|
|
|
4727 |
'bash' => 'Bourne Again Shell' ,
|
|
|
4728 |
'bas' => 'Visual Basic' ,
|
|
|
4729 |
'dxl' => 'DOORS Extension Language',
|
|
|
4730 |
'bat' => 'DOS Batch' ,
|
|
|
4731 |
'BAT' => 'DOS Batch' ,
|
|
|
4732 |
'cmd' => 'DOS Batch' ,
|
|
|
4733 |
'CMD' => 'DOS Batch' ,
|
|
|
4734 |
'btm' => 'DOS Batch' ,
|
|
|
4735 |
'BTM' => 'DOS Batch' ,
|
|
|
4736 |
'build.xml' => 'Ant' ,
|
|
|
4737 |
'cbl' => 'COBOL' ,
|
|
|
4738 |
'CBL' => 'COBOL' ,
|
|
|
4739 |
'c' => 'C' ,
|
|
|
4740 |
'C' => 'C++' ,
|
|
|
4741 |
'cc' => 'C++' ,
|
|
|
4742 |
'c++' => 'C++' ,
|
|
|
4743 |
'ccs' => 'CCS' ,
|
|
|
4744 |
'cfc' => 'ColdFusion CFScript' ,
|
|
|
4745 |
'cfm' => 'ColdFusion' ,
|
|
|
4746 |
'cl' => 'Lisp/OpenCL' ,
|
|
|
4747 |
'clj' => 'Clojure' ,
|
|
|
4748 |
'cljs' => 'ClojureScript' ,
|
|
|
4749 |
'cls' => 'Visual Basic' , # also Apex Class
|
|
|
4750 |
'CMakeLists.txt' => 'CMake' ,
|
|
|
4751 |
'cmake' => 'CMake' ,
|
|
|
4752 |
'cob' => 'COBOL' ,
|
|
|
4753 |
'COB' => 'COBOL' ,
|
|
|
4754 |
'coffee' => 'CoffeeScript' ,
|
|
|
4755 |
'component' => 'Visualforce Component' ,
|
|
|
4756 |
'cpp' => 'C++' ,
|
|
|
4757 |
'cs' => 'C#' ,
|
|
|
4758 |
'csh' => 'C Shell' ,
|
|
|
4759 |
'css' => "CSS" ,
|
|
|
4760 |
'ctl' => 'Visual Basic' ,
|
|
|
4761 |
'cu' => 'CUDA' ,
|
|
|
4762 |
'cxx' => 'C++' ,
|
|
|
4763 |
'd' => 'D/dtrace' ,
|
|
|
4764 |
# in addition, .d can map to init.d files typically written as
|
|
|
4765 |
# bash or sh scripts
|
|
|
4766 |
'da' => 'DAL' ,
|
|
|
4767 |
'dart' => 'Dart' ,
|
|
|
4768 |
'def' => 'Windows Module Definition',
|
|
|
4769 |
'diff' => 'diff' ,
|
|
|
4770 |
'dmap' => 'NASTRAN DMAP' ,
|
|
|
4771 |
'dpr' => 'Pascal' ,
|
|
|
4772 |
'dita' => 'DITA' ,
|
|
|
4773 |
'dsr' => 'Visual Basic' ,
|
|
|
4774 |
'dtd' => 'DTD' ,
|
|
|
4775 |
'ec' => 'C' ,
|
|
|
4776 |
'ecpp' => 'ECPP' ,
|
|
|
4777 |
'el' => 'Lisp' ,
|
|
|
4778 |
'exs' => 'Elixir' ,
|
|
|
4779 |
'ex' => 'Elixir' ,
|
|
|
4780 |
'erb' => 'ERB' ,
|
|
|
4781 |
'ERB' => 'ERB' ,
|
|
|
4782 |
'erl' => 'Erlang' ,
|
|
|
4783 |
'exp' => 'Expect' ,
|
|
|
4784 |
'f77' => 'Fortran 77' ,
|
|
|
4785 |
'F77' => 'Fortran 77' ,
|
|
|
4786 |
'f90' => 'Fortran 90' ,
|
|
|
4787 |
'F90' => 'Fortran 90' ,
|
|
|
4788 |
'f95' => 'Fortran 95' ,
|
|
|
4789 |
'F95' => 'Fortran 95' ,
|
|
|
4790 |
'f' => 'Fortran 77' ,
|
|
|
4791 |
'F' => 'Fortran 77' ,
|
|
|
4792 |
'for' => 'Fortran 77' ,
|
|
|
4793 |
'FOR' => 'Fortran 77' ,
|
|
|
4794 |
'ftn' => 'Fortran 77' ,
|
|
|
4795 |
'FTN' => 'Fortran 77' ,
|
|
|
4796 |
'fmt' => 'Oracle Forms' ,
|
|
|
4797 |
'focexec' => 'Focus' ,
|
|
|
4798 |
'frm' => 'Visual Basic' ,
|
|
|
4799 |
'fs' => 'F#' ,
|
|
|
4800 |
'fsi' => 'F#' ,
|
|
|
4801 |
'gnumakefile' => 'make' ,
|
|
|
4802 |
'Gnumakefile' => 'make' ,
|
|
|
4803 |
'go' => 'Go' ,
|
|
|
4804 |
'gsp' => 'Grails' ,
|
|
|
4805 |
'groovy' => 'Groovy' ,
|
|
|
4806 |
'gant' => 'Groovy' ,
|
|
|
4807 |
'gradle' => 'Groovy' ,
|
|
|
4808 |
'h' => 'C/C++ Header' ,
|
|
|
4809 |
'H' => 'C/C++ Header' ,
|
|
|
4810 |
'hh' => 'C/C++ Header' ,
|
|
|
4811 |
'hpp' => 'C/C++ Header' ,
|
|
|
4812 |
'hb' => 'Harbour' ,
|
|
|
4813 |
'hrl' => 'Erlang' ,
|
|
|
4814 |
'hs' => 'Haskell' ,
|
|
|
4815 |
'hlsl' => 'HLSL' ,
|
|
|
4816 |
'shader' => 'HLSL' ,
|
|
|
4817 |
'cg' => 'HLSL' ,
|
|
|
4818 |
'cginc' => 'HLSL' ,
|
|
|
4819 |
'haml' => 'Haml' ,
|
|
|
4820 |
'handlebars' => 'Handlebars' ,
|
|
|
4821 |
'hbs' => 'Handlebars' ,
|
|
|
4822 |
'htm' => 'HTML' ,
|
|
|
4823 |
'html' => 'HTML' ,
|
|
|
4824 |
'i3' => 'Modula3' ,
|
|
|
4825 |
'idl' => 'IDL' ,
|
|
|
4826 |
'ism' => 'InstallShield' ,
|
|
|
4827 |
'pro' => 'IDL/Qt Project/Prolog' ,
|
|
|
4828 |
'ig' => 'Modula3' ,
|
|
|
4829 |
'il' => 'SKILL' ,
|
|
|
4830 |
'ils' => 'SKILL++' ,
|
|
|
4831 |
'inc' => 'PHP/Pascal' , # might be PHP or Pascal
|
|
|
4832 |
'ino' => 'Arduino Sketch' ,
|
|
|
4833 |
'pde' => 'Arduino Sketch' , # pre 1.0
|
|
|
4834 |
'itk' => 'Tcl/Tk' ,
|
|
|
4835 |
'java' => 'Java' ,
|
|
|
4836 |
'jcl' => 'JCL' , # IBM Job Control Lang.
|
|
|
4837 |
'jl' => 'Lisp/Julia' ,
|
|
|
4838 |
'js' => 'Javascript' ,
|
|
|
4839 |
'jsf' => 'JavaServer Faces' ,
|
|
|
4840 |
'xhtml' => 'JavaServer Faces' ,
|
|
|
4841 |
'json' => 'JSON' ,
|
|
|
4842 |
'jsp' => 'JSP' , # Java server pages
|
|
|
4843 |
'jspf' => 'JSP' , # Java server pages
|
|
|
4844 |
'vm' => 'Velocity Template Language' ,
|
|
|
4845 |
'ksc' => 'Kermit' ,
|
|
|
4846 |
'ksh' => 'Korn Shell' ,
|
|
|
4847 |
'kt' => 'Kotlin' ,
|
|
|
4848 |
'lhs' => 'Haskell' ,
|
|
|
4849 |
'l' => 'lex' ,
|
|
|
4850 |
'less' => 'LESS' ,
|
|
|
4851 |
'lsp' => 'Lisp' ,
|
|
|
4852 |
'lisp' => 'Lisp' ,
|
|
|
4853 |
'lua' => 'Lua' ,
|
|
|
4854 |
'm3' => 'Modula3' ,
|
|
|
4855 |
'm4' => 'm4' ,
|
|
|
4856 |
'makefile' => 'make' ,
|
|
|
4857 |
'Makefile' => 'make' ,
|
|
|
4858 |
'mc' => 'Windows Message File' ,
|
|
|
4859 |
'met' => 'Teamcenter met' ,
|
|
|
4860 |
'mg' => 'Modula3' ,
|
|
|
4861 |
# 'mli' => 'ML' , # ML not implemented
|
|
|
4862 |
# 'ml' => 'ML' ,
|
|
|
4863 |
'ml' => 'OCaml' ,
|
|
|
4864 |
'mli' => 'OCaml' ,
|
|
|
4865 |
'mly' => 'OCaml' ,
|
|
|
4866 |
'mll' => 'OCaml' ,
|
|
|
4867 |
'm' => 'MATLAB/Objective C/MUMPS/Mercury' ,
|
|
|
4868 |
'mm' => 'Objective C++' ,
|
|
|
4869 |
'mustache' => 'Mustache' ,
|
|
|
4870 |
'wdproj' => 'MSBuild script' ,
|
|
|
4871 |
'csproj' => 'MSBuild script' ,
|
|
|
4872 |
'vcproj' => 'MSBuild script' ,
|
|
|
4873 |
'wixproj' => 'MSBuild script' ,
|
|
|
4874 |
'vbproj' => 'MSBuild script' ,
|
|
|
4875 |
'mps' => 'MUMPS' ,
|
|
|
4876 |
'mth' => 'Teamcenter mth' ,
|
|
|
4877 |
'oscript' => 'LiveLink OScript' ,
|
|
|
4878 |
'pad' => 'Ada' , # Oracle Ada preprocessor
|
|
|
4879 |
'page' => 'Visualforce Page' ,
|
|
|
4880 |
'pas' => 'Pascal' ,
|
|
|
4881 |
'pcc' => 'C++' , # Oracle C++ preprocessor
|
|
|
4882 |
'perl' => 'Perl' ,
|
|
|
4883 |
'pfo' => 'Fortran 77' ,
|
|
|
4884 |
'pgc' => 'C' , # Postgres embedded C/C++
|
|
|
4885 |
'php3' => 'PHP' ,
|
|
|
4886 |
'php4' => 'PHP' ,
|
|
|
4887 |
'php5' => 'PHP' ,
|
|
|
4888 |
'php' => 'PHP' ,
|
|
|
4889 |
'pig' => 'Pig Latin' ,
|
|
|
4890 |
'plh' => 'Perl' ,
|
|
|
4891 |
'pl' => 'Perl/Prolog' ,
|
|
|
4892 |
'PL' => 'Perl/Prolog' ,
|
|
|
4893 |
'plx' => 'Perl' ,
|
|
|
4894 |
'pm' => 'Perl' ,
|
|
|
4895 |
'pom.xml' => 'Maven' ,
|
|
|
4896 |
'pom' => 'Maven' ,
|
|
|
4897 |
'P' => 'Prolog' ,
|
|
|
4898 |
'p' => 'Pascal' ,
|
|
|
4899 |
'pp' => 'Pascal/Puppet' ,
|
|
|
4900 |
'psql' => 'SQL' ,
|
|
|
4901 |
'py' => 'Python' ,
|
|
|
4902 |
'pyx' => 'Cython' ,
|
|
|
4903 |
'qml' => 'QML' ,
|
|
|
4904 |
'rb' => 'Ruby' ,
|
|
|
4905 |
'rake' => 'Ruby' ,
|
|
|
4906 |
# 'resx' => 'ASP.Net' ,
|
|
|
4907 |
'rex' => 'Oracle Reports' ,
|
|
|
4908 |
'rexx' => 'Rexx' ,
|
|
|
4909 |
'rhtml' => 'Ruby HTML' ,
|
|
|
4910 |
'rs' => 'Rust' ,
|
|
|
4911 |
's' => 'Assembly' ,
|
|
|
4912 |
'S' => 'Assembly' ,
|
|
|
4913 |
'SCA' => 'Visual Fox Pro' ,
|
|
|
4914 |
'sca' => 'Visual Fox Pro' ,
|
|
|
4915 |
'scala' => 'Scala' ,
|
|
|
4916 |
'sbl' => 'Softbridge Basic' ,
|
|
|
4917 |
'SBL' => 'Softbridge Basic' ,
|
|
|
4918 |
'sc' => 'Lisp' ,
|
|
|
4919 |
'scm' => 'Lisp' ,
|
|
|
4920 |
'sed' => 'sed' ,
|
|
|
4921 |
'ses' => 'Patran Command Language' ,
|
|
|
4922 |
'pcl' => 'Patran Command Language' ,
|
|
|
4923 |
'pl1' => 'PL/I' ,
|
|
|
4924 |
'purs' => 'PureScript' ,
|
|
|
4925 |
'prefab' => 'Unity-Prefab' ,
|
|
|
4926 |
'proto' => 'Protocol Buffers' ,
|
|
|
4927 |
'mat' => 'Unity-Prefab' ,
|
|
|
4928 |
'ps1' => 'PowerShell' ,
|
|
|
4929 |
'R' => 'R' ,
|
|
|
4930 |
'rkt' => 'Racket' ,
|
|
|
4931 |
'rktl' => 'Racket' ,
|
|
|
4932 |
'ss' => 'Racket' ,
|
|
|
4933 |
'scm' => 'Racket' ,
|
|
|
4934 |
'sch' => 'Racket' ,
|
|
|
4935 |
'scrbl' => 'Racket' ,
|
|
|
4936 |
'tsv' => 'RobotFramework' ,
|
|
|
4937 |
'robot' => 'RobotFramework' ,
|
|
|
4938 |
'rc' => 'Windows Resource File' ,
|
|
|
4939 |
'rc2' => 'Windows Resource File' ,
|
|
|
4940 |
'sas' => 'SAS' ,
|
|
|
4941 |
'sass' => 'SASS' ,
|
|
|
4942 |
'scss' => 'SASS' ,
|
|
|
4943 |
'sh' => 'Bourne Shell' ,
|
|
|
4944 |
'smarty' => 'Smarty' ,
|
|
|
4945 |
'sml' => 'Standard ML' ,
|
|
|
4946 |
'sig' => 'Standard ML' ,
|
|
|
4947 |
'fun' => 'Standard ML' ,
|
|
|
4948 |
'sql' => 'SQL' ,
|
|
|
4949 |
'SQL' => 'SQL' ,
|
|
|
4950 |
'sproc.sql' => 'SQL Stored Procedure' ,
|
|
|
4951 |
'spoc.sql' => 'SQL Stored Procedure' ,
|
|
|
4952 |
'spc.sql' => 'SQL Stored Procedure' ,
|
|
|
4953 |
'udf.sql' => 'SQL Stored Procedure' ,
|
|
|
4954 |
'data.sql' => 'SQL Data' ,
|
|
|
4955 |
'v' => 'Verilog-SystemVerilog' ,
|
|
|
4956 |
'sv' => 'Verilog-SystemVerilog' ,
|
|
|
4957 |
'svh' => 'Verilog-SystemVerilog' ,
|
|
|
4958 |
'tcl' => 'Tcl/Tk' ,
|
|
|
4959 |
'tcsh' => 'C Shell' ,
|
|
|
4960 |
'tk' => 'Tcl/Tk' ,
|
|
|
4961 |
'tpl' => 'Smarty' ,
|
|
|
4962 |
'trigger' => 'Apex Trigger' ,
|
|
|
4963 |
'ts' => 'TypeScript' ,
|
|
|
4964 |
'tss' => 'Titanium Style Sheet' ,
|
|
|
4965 |
'vala' => 'Vala' ,
|
|
|
4966 |
'vapi' => 'Vala Header' ,
|
|
|
4967 |
'vhd' => 'VHDL' ,
|
|
|
4968 |
'VHD' => 'VHDL' ,
|
|
|
4969 |
'vhdl' => 'VHDL' ,
|
|
|
4970 |
'VHDL' => 'VHDL' ,
|
|
|
4971 |
'vba' => 'Visual Basic' ,
|
|
|
4972 |
'VBA' => 'Visual Basic' ,
|
|
|
4973 |
# 'vbp' => 'Visual Basic' , # .vbp - autogenerated
|
|
|
4974 |
'vb' => 'Visual Basic' ,
|
|
|
4975 |
'VB' => 'Visual Basic' ,
|
|
|
4976 |
# 'vbw' => 'Visual Basic' , # .vbw - autogenerated
|
|
|
4977 |
'vbs' => 'Visual Basic' ,
|
|
|
4978 |
'VBS' => 'Visual Basic' ,
|
|
|
4979 |
'webinfo' => 'ASP.Net' ,
|
|
|
4980 |
'xml' => 'XML' ,
|
|
|
4981 |
'XML' => 'XML' ,
|
|
|
4982 |
'mxml' => 'MXML' ,
|
|
|
4983 |
'build' => 'NAnt script' ,
|
|
|
4984 |
'vim' => 'vim script' ,
|
|
|
4985 |
'swift' => 'Swift' ,
|
|
|
4986 |
'xaml' => 'XAML' ,
|
|
|
4987 |
'wxs' => 'WiX source' ,
|
|
|
4988 |
'wxi' => 'WiX include' ,
|
|
|
4989 |
'wxl' => 'WiX string localization' ,
|
|
|
4990 |
'prg' => 'xBase' ,
|
|
|
4991 |
'ch' => 'xBase Header' ,
|
|
|
4992 |
'xq' => 'XQuery' ,
|
|
|
4993 |
'xquery' => 'XQuery' ,
|
|
|
4994 |
'xsd' => 'XSD' ,
|
|
|
4995 |
'XSD' => 'XSD' ,
|
|
|
4996 |
'xslt' => 'XSLT' ,
|
|
|
4997 |
'XSLT' => 'XSLT' ,
|
|
|
4998 |
'xsl' => 'XSLT' ,
|
|
|
4999 |
'XSL' => 'XSLT' ,
|
|
|
5000 |
'y' => 'yacc' ,
|
|
|
5001 |
'yaml' => 'YAML' ,
|
|
|
5002 |
'yml' => 'YAML' ,
|
|
|
5003 |
);
|
|
|
5004 |
# 1}}}
|
|
|
5005 |
%{$rh_Language_by_Script} = ( # {{{1
|
|
|
5006 |
'awk' => 'awk' ,
|
|
|
5007 |
'bash' => 'Bourne Again Shell' ,
|
|
|
5008 |
'bc' => 'bc' ,# calculator
|
|
|
5009 |
'csh' => 'C Shell' ,
|
|
|
5010 |
'dmd' => 'D' ,
|
|
|
5011 |
'dtrace' => 'dtrace' ,
|
|
|
5012 |
'idl' => 'IDL' ,
|
|
|
5013 |
'kermit' => 'Kermit' ,
|
|
|
5014 |
'ksh' => 'Korn Shell' ,
|
|
|
5015 |
'lua' => 'Lua' ,
|
|
|
5016 |
'make' => 'make' ,
|
|
|
5017 |
'octave' => 'Octave' ,
|
|
|
5018 |
'perl5' => 'Perl' ,
|
|
|
5019 |
'perl' => 'Perl' ,
|
|
|
5020 |
'php' => 'PHP' ,
|
|
|
5021 |
'php5' => 'PHP' ,
|
|
|
5022 |
'python' => 'Python' ,
|
|
|
5023 |
'python2.6'=> 'Python' ,
|
|
|
5024 |
'python2.7'=> 'Python' ,
|
|
|
5025 |
'python3' => 'Python' ,
|
|
|
5026 |
'python3.3'=> 'Python' ,
|
|
|
5027 |
'python3.4'=> 'Python' ,
|
|
|
5028 |
'rexx' => 'Rexx' ,
|
|
|
5029 |
'regina' => 'Rexx' ,
|
|
|
5030 |
'ruby' => 'Ruby' ,
|
|
|
5031 |
'sed' => 'sed' ,
|
|
|
5032 |
'sh' => 'Bourne Shell' ,
|
|
|
5033 |
'swipl' => 'Prolog' ,
|
|
|
5034 |
'tcl' => 'Tcl/Tk' ,
|
|
|
5035 |
'tclsh' => 'Tcl/Tk' ,
|
|
|
5036 |
'tcsh' => 'C Shell' ,
|
|
|
5037 |
'wish' => 'Tcl/Tk' ,
|
|
|
5038 |
);
|
|
|
5039 |
# 1}}}
|
|
|
5040 |
%{$rh_Language_by_File} = ( # {{{1
|
|
|
5041 |
'Makefile' => 'make' ,
|
|
|
5042 |
'makefile' => 'make' ,
|
|
|
5043 |
'gnumakefile' => 'make' ,
|
|
|
5044 |
'Gnumakefile' => 'make' ,
|
|
|
5045 |
'CMakeLists.txt' => 'CMake' ,
|
|
|
5046 |
'build.xml' => 'Ant/XML' ,
|
|
|
5047 |
'pom.xml' => 'Maven/XML' ,
|
|
|
5048 |
'Rakefile' => 'Ruby' ,
|
|
|
5049 |
'rakefile' => 'Ruby' ,
|
|
|
5050 |
);
|
|
|
5051 |
# 1}}}
|
|
|
5052 |
%{$rhaa_Filters_by_Language} = ( # {{{1
|
|
|
5053 |
'(unknown)' => [ ],
|
|
|
5054 |
'ABAP' => [ [ 'remove_matches' , '^\*' ], ],
|
|
|
5055 |
'ActionScript' => [
|
|
|
5056 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5057 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5058 |
],
|
|
|
5059 |
|
|
|
5060 |
'ASP' => [ [ 'remove_matches' , '^\s*\47'], ], # \47 = '
|
|
|
5061 |
'ASP.Net' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
5062 |
'Ada' => [ [ 'remove_matches' , '^\s*--' ], ],
|
|
|
5063 |
'ADSO/IDSM' => [ [ 'remove_matches' , '^\s*\*[\+\!]' ], ],
|
|
|
5064 |
'AMPLE' => [ [ 'remove_matches' , '^\s*//' ], ],
|
|
|
5065 |
'Ant/XML' => [
|
|
|
5066 |
[ 'remove_html_comments', ],
|
|
|
5067 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5068 |
],
|
|
|
5069 |
'Ant' => [
|
|
|
5070 |
[ 'remove_html_comments', ],
|
|
|
5071 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5072 |
],
|
|
|
5073 |
'Apex Trigger' => [
|
|
|
5074 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5075 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5076 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5077 |
],
|
|
|
5078 |
'Arduino Sketch' => [ # same as C
|
|
|
5079 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5080 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5081 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5082 |
],
|
|
|
5083 |
'Assembly' => [
|
|
|
5084 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5085 |
[ 'remove_matches' , '^\s*;' ],
|
|
|
5086 |
[ 'remove_matches' , '^\s*\@' ],
|
|
|
5087 |
[ 'remove_matches' , '^\s*\|' ],
|
|
|
5088 |
[ 'remove_matches' , '^\s*!' ],
|
|
|
5089 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5090 |
[ 'remove_matches' , '^\s*--' ],
|
|
|
5091 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5092 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5093 |
[ 'remove_inline' , ';.*$' ],
|
|
|
5094 |
[ 'remove_inline' , '\@.*$' ],
|
|
|
5095 |
[ 'remove_inline' , '\|.*$' ],
|
|
|
5096 |
[ 'remove_inline' , '!.*$' ],
|
|
|
5097 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5098 |
[ 'remove_inline' , '--.*$' ],
|
|
|
5099 |
],
|
|
|
5100 |
'AutoHotkey' => [
|
|
|
5101 |
[ 'remove_matches' , '^\s*;' ],
|
|
|
5102 |
[ 'remove_inline' , ';.*$' ],
|
|
|
5103 |
],
|
|
|
5104 |
'awk' => [
|
|
|
5105 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5106 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5107 |
],
|
|
|
5108 |
'bc' => [
|
|
|
5109 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5110 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5111 |
],
|
|
|
5112 |
'Bourne Again Shell' => [
|
|
|
5113 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5114 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5115 |
],
|
|
|
5116 |
'Bourne Shell' => [
|
|
|
5117 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5118 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5119 |
],
|
|
|
5120 |
'C' => [
|
|
|
5121 |
[ 'remove_matches' , '^\s*//' ], # C99
|
|
|
5122 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5123 |
[ 'remove_inline' , '//.*$' ], # C99
|
|
|
5124 |
],
|
|
|
5125 |
'C++' => [
|
|
|
5126 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5127 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5128 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5129 |
],
|
|
|
5130 |
'C/C++ Header' => [
|
|
|
5131 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5132 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5133 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5134 |
],
|
|
|
5135 |
'Clojure' => [ [ 'remove_matches' , '^\s*;' ], ],
|
|
|
5136 |
'ClojureScript' => [ [ 'remove_matches' , '^\s*;' ], ],
|
|
|
5137 |
'CMake' => [
|
|
|
5138 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5139 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5140 |
],
|
|
|
5141 |
'CUDA' => [
|
|
|
5142 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5143 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5144 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5145 |
],
|
|
|
5146 |
'Cython' => [
|
|
|
5147 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5148 |
[ 'docstring_to_C' ],
|
|
|
5149 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5150 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5151 |
],
|
|
|
5152 |
'C#' => [
|
|
|
5153 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5154 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5155 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5156 |
],
|
|
|
5157 |
'CCS' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
5158 |
'CSS' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
5159 |
'COBOL' => [ [ 'remove_cobol_comments', ], ],
|
|
|
5160 |
'CoffeeScript' => [
|
|
|
5161 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5162 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5163 |
],
|
|
|
5164 |
'ColdFusion' => [ [ 'remove_html_comments', ],
|
|
|
5165 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
5166 |
'ColdFusion CFScript'=> [
|
|
|
5167 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5168 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5169 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5170 |
],
|
|
|
5171 |
'Crystal Reports' => [ [ 'remove_matches' , '^\s*//' ], ],
|
|
|
5172 |
'D/dtrace' => [ [ 'die' , ], ], # never called
|
|
|
5173 |
'D' => [
|
|
|
5174 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5175 |
[ 'remove_between_general', '/+', '+/' ],
|
|
|
5176 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5177 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5178 |
],
|
|
|
5179 |
'DAL' => [
|
|
|
5180 |
[ 'remove_between_general', '[', ']', ],
|
|
|
5181 |
],
|
|
|
5182 |
'Dart' => [
|
|
|
5183 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5184 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5185 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5186 |
],
|
|
|
5187 |
# diff is kind of weird: anything but a space in the first column
|
|
|
5188 |
# will count as code, with the exception of #, ---, +++. Spaces
|
|
|
5189 |
# in the first column denote context lines which aren't part of the
|
|
|
5190 |
# difference.
|
|
|
5191 |
'diff' => [
|
|
|
5192 |
[ 'remove_matches' , '^#' ],
|
|
|
5193 |
[ 'remove_matches' , '^\-\-\-' ],
|
|
|
5194 |
[ 'remove_matches' , '^\+\+\+' ],
|
|
|
5195 |
[ 'remove_matches' , '^\s' ],
|
|
|
5196 |
],
|
|
|
5197 |
'DITA' => [
|
|
|
5198 |
[ 'remove_html_comments', ],
|
|
|
5199 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5200 |
],
|
|
|
5201 |
'DOORS Extension Language' => [
|
|
|
5202 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5203 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5204 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5205 |
],
|
|
|
5206 |
'dtrace' => [
|
|
|
5207 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5208 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5209 |
],
|
|
|
5210 |
'ECPP' => [
|
|
|
5211 |
[ 'remove_between_general',
|
|
|
5212 |
'<%doc>', '</%doc>', ],
|
|
|
5213 |
[ 'remove_between_general',
|
|
|
5214 |
'<#' , '#>' , ],
|
|
|
5215 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5216 |
],
|
|
|
5217 |
'ERB' => [
|
|
|
5218 |
[ 'remove_between_general', '<%#', '%>' ],
|
|
|
5219 |
],
|
|
|
5220 |
'NASTRAN DMAP' => [
|
|
|
5221 |
[ 'remove_matches' , '^\s*\$' ],
|
|
|
5222 |
[ 'remove_inline' , '\$.*$' ],
|
|
|
5223 |
],
|
|
|
5224 |
'DOS Batch' => [ [ 'remove_matches' , '^\s*rem', ], ],
|
|
|
5225 |
'DTD' => [ [ 'remove_html_comments', ],
|
|
|
5226 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
5227 |
'Elixir' => [
|
|
|
5228 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5229 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5230 |
],
|
|
|
5231 |
'Erlang' => [
|
|
|
5232 |
[ 'remove_matches' , '^\s*%' ],
|
|
|
5233 |
[ 'remove_inline' , '%.*$' ],
|
|
|
5234 |
],
|
|
|
5235 |
'Expect' => [
|
|
|
5236 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5237 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5238 |
],
|
|
|
5239 |
'Focus' => [ [ 'remove_matches' , '^\s*\-\*' ], ],
|
|
|
5240 |
'Fortran 77' => [
|
|
|
5241 |
[ 'remove_f77_comments' , ],
|
|
|
5242 |
[ 'remove_inline' , '\!.*$' ],
|
|
|
5243 |
],
|
|
|
5244 |
'Fortran 90' => [
|
|
|
5245 |
[ 'remove_f77_comments' , ],
|
|
|
5246 |
[ 'remove_f90_comments' , ],
|
|
|
5247 |
[ 'remove_inline' , '\!.*$' ],
|
|
|
5248 |
],
|
|
|
5249 |
'Fortran 95' => [
|
|
|
5250 |
[ 'remove_f77_comments' , ],
|
|
|
5251 |
[ 'remove_f90_comments' , ],
|
|
|
5252 |
[ 'remove_inline' , '\!.*$' ],
|
|
|
5253 |
],
|
|
|
5254 |
'F#' => [
|
|
|
5255 |
[ 'call_regexp_common' , 'Pascal' ],
|
|
|
5256 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5257 |
],
|
|
|
5258 |
'Go' => [
|
|
|
5259 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5260 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5261 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5262 |
],
|
|
|
5263 |
'Grails' => [
|
|
|
5264 |
[ 'remove_html_comments', ],
|
|
|
5265 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5266 |
[ 'remove_jsp_comments' , ],
|
|
|
5267 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5268 |
[ 'add_newlines' , ],
|
|
|
5269 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5270 |
],
|
|
|
5271 |
'Groovy' => [
|
|
|
5272 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5273 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5274 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5275 |
],
|
|
|
5276 |
'Handlebars' => [
|
|
|
5277 |
[ 'remove_between_general', '{{!--', '--}}' ],
|
|
|
5278 |
[ 'remove_between_general', '{{!', '}}' ],
|
|
|
5279 |
[ 'remove_html_comments', ],
|
|
|
5280 |
],
|
|
|
5281 |
'Harbour' => [
|
|
|
5282 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5283 |
[ 'remove_matches' , '^\s*\&\&' ],
|
|
|
5284 |
[ 'remove_matches' , '^\s*\*' ],
|
|
|
5285 |
[ 'remove_matches' , '^\s*NOTE' ],
|
|
|
5286 |
[ 'remove_matches' , '^\s*note' ],
|
|
|
5287 |
[ 'remove_matches' , '^\s*Note' ],
|
|
|
5288 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5289 |
[ 'remove_inline' , '\&\&.*$' ],
|
|
|
5290 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5291 |
],
|
|
|
5292 |
'HLSL' => [
|
|
|
5293 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5294 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5295 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5296 |
],
|
|
|
5297 |
'Haml' => [
|
|
|
5298 |
[ 'remove_haml_block' , ],
|
|
|
5299 |
[ 'remove_html_comments', ],
|
|
|
5300 |
[ 'remove_matches' , '^\s*/\s*\S+' ],
|
|
|
5301 |
[ 'remove_matches' , '^\s*-#\s*\S+' ],
|
|
|
5302 |
],
|
|
|
5303 |
'HTML' => [
|
|
|
5304 |
[ 'remove_html_comments', ],
|
|
|
5305 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5306 |
],
|
|
|
5307 |
'Haskell' => [ [ 'remove_haskell_comments', '>filename<' ], ],
|
|
|
5308 |
'IDL' => [ [ 'remove_matches' , '^\s*;' ], ],
|
|
|
5309 |
'IDL/Qt Project/Prolog' => [ [ 'die' , ], ], # never called
|
|
|
5310 |
'InstallShield' => [ [ 'remove_html_comments', ],
|
|
|
5311 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
5312 |
'JSP' => [ [ 'remove_html_comments', ],
|
|
|
5313 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5314 |
[ 'remove_jsp_comments' , ],
|
|
|
5315 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5316 |
[ 'add_newlines' , ],
|
|
|
5317 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5318 |
],
|
|
|
5319 |
'JavaServer Faces' => [
|
|
|
5320 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5321 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5322 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5323 |
],
|
|
|
5324 |
'Java' => [
|
|
|
5325 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5326 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5327 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5328 |
],
|
|
|
5329 |
'Javascript' => [
|
|
|
5330 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5331 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5332 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5333 |
],
|
|
|
5334 |
'JCL' => [ [ 'remove_jcl_comments' , ], ],
|
|
|
5335 |
'JSON' => [ # ECMA-404, the JSON standard definition
|
|
|
5336 |
# makes no provision for JSON comments
|
|
|
5337 |
# so just use a placeholder filter
|
|
|
5338 |
[ 'remove_matches' , '^\s*$' ],
|
|
|
5339 |
],
|
|
|
5340 |
'Julia' => [
|
|
|
5341 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5342 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5343 |
[ 'remove_between_general', '#=', '=#' ],
|
|
|
5344 |
],
|
|
|
5345 |
'Kotlin' => [
|
|
|
5346 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5347 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5348 |
[ 'remove_between_general', '/*', '*/' ],
|
|
|
5349 |
],
|
|
|
5350 |
'LESS' => [
|
|
|
5351 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5352 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5353 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5354 |
],
|
|
|
5355 |
'Lisp' => [
|
|
|
5356 |
[ 'remove_matches' , '^\s*;' ],
|
|
|
5357 |
[ 'remove_between_general', '#|', '|#' ],
|
|
|
5358 |
],
|
|
|
5359 |
'Lisp/OpenCL' => [ [ 'die' , ], ], # never called
|
|
|
5360 |
'Lisp/Julia' => [ [ 'die' , ], ], # never called
|
|
|
5361 |
'LiveLink OScript' => [ [ 'remove_matches' , '^\s*//' ], ],
|
|
|
5362 |
# 'Lua' => [ [ 'call_regexp_common' , 'lua' ], ],
|
|
|
5363 |
'Lua' => [ [ 'remove_matches' , '^\s*\-\-' ], ],
|
|
|
5364 |
'make' => [
|
|
|
5365 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5366 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5367 |
],
|
|
|
5368 |
'MATLAB' => [
|
|
|
5369 |
[ 'remove_matches' , '^\s*%' ],
|
|
|
5370 |
[ 'remove_inline' , '%.*$' ],
|
|
|
5371 |
],
|
|
|
5372 |
'Maven/XML' => [
|
|
|
5373 |
[ 'remove_html_comments', ],
|
|
|
5374 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5375 |
],
|
|
|
5376 |
'Maven' => [
|
|
|
5377 |
[ 'remove_html_comments', ],
|
|
|
5378 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5379 |
],
|
|
|
5380 |
'Mercury' => [
|
|
|
5381 |
[ 'remove_inline' , '%.*$' ],
|
|
|
5382 |
[ 'remove_matches' , '^\s*%' ],
|
|
|
5383 |
],
|
|
|
5384 |
'Modula3' => [ [ 'call_regexp_common' , 'Pascal' ], ],
|
|
|
5385 |
# Modula 3 comments are (* ... *) so applying the Pascal filter
|
|
|
5386 |
# which also treats { ... } as a comment is not really correct.
|
|
|
5387 |
'Objective C' => [
|
|
|
5388 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5389 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5390 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5391 |
],
|
|
|
5392 |
'Objective C++' => [
|
|
|
5393 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5394 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5395 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5396 |
],
|
|
|
5397 |
'OCaml' => [
|
|
|
5398 |
[ 'call_regexp_common' , 'Pascal' ],
|
|
|
5399 |
],
|
|
|
5400 |
'OpenCL' => [
|
|
|
5401 |
[ 'remove_matches' , '^\s*//' ], # C99
|
|
|
5402 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5403 |
[ 'remove_inline' , '//.*$' ], # C99
|
|
|
5404 |
],
|
|
|
5405 |
'PHP/Pascal' => [ [ 'die' , ], ], # never called
|
|
|
5406 |
'MATLAB/Objective C/MUMPS/Mercury' => [ [ 'die' , ], ], # never called
|
|
|
5407 |
'MUMPS' => [ [ 'remove_matches' , '^\s*;' ], ],
|
|
|
5408 |
'Mustache' => [
|
|
|
5409 |
[ 'remove_between_general', '{{!', '}}' ],
|
|
|
5410 |
],
|
|
|
5411 |
'Octave' => [
|
|
|
5412 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5413 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5414 |
],
|
|
|
5415 |
'Oracle Forms' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
5416 |
'Oracle Reports' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
5417 |
'Pascal' => [
|
|
|
5418 |
[ 'remove_between_regex', '{[^$]', '}' ],
|
|
|
5419 |
[ 'remove_between_general', '(*', '*)' ],
|
|
|
5420 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5421 |
],
|
|
|
5422 |
####'Pascal' => [
|
|
|
5423 |
#### [ 'call_regexp_common' , 'Pascal' ],
|
|
|
5424 |
#### [ 'remove_matches' , '^\s*//' ],
|
|
|
5425 |
#### ],
|
|
|
5426 |
'Pascal/Puppet' => [ [ 'die' , ], ], # never called
|
|
|
5427 |
'Puppet' => [
|
|
|
5428 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5429 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5430 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5431 |
],
|
|
|
5432 |
'PureScript' => [
|
|
|
5433 |
[ 'remove_matches' , '^\s*--' ],
|
|
|
5434 |
[ 'remove_between_general', '{-', '-}' ],
|
|
|
5435 |
[ 'remove_inline' , '--.*$' ],
|
|
|
5436 |
],
|
|
|
5437 |
'Patran Command Language'=> [
|
|
|
5438 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5439 |
[ 'remove_matches' , '^\s*\$#' ],
|
|
|
5440 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5441 |
],
|
|
|
5442 |
'PL/I' => [
|
|
|
5443 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5444 |
],
|
|
|
5445 |
'Perl' => [ [ 'remove_below' , '^__(END|DATA)__'],
|
|
|
5446 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5447 |
[ 'remove_below_above' , '^=head1', '^=cut' ],
|
|
|
5448 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5449 |
],
|
|
|
5450 |
'Perl/Prolog' => [ [ 'die' , ], ], # never called
|
|
|
5451 |
'Pig Latin' => [
|
|
|
5452 |
[ 'remove_matches' , '^\s*--' ],
|
|
|
5453 |
[ 'remove_inline' , '--.*$' ],
|
|
|
5454 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5455 |
],
|
|
|
5456 |
'PowerShell' => [
|
|
|
5457 |
[ 'powershell_to_C' ],
|
|
|
5458 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5459 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5460 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5461 |
],
|
|
|
5462 |
'Prolog' => [
|
|
|
5463 |
[ 'remove_matches' , '^\s*\%' ],
|
|
|
5464 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5465 |
[ 'remove_inline' , '(//|\%).*$' ],
|
|
|
5466 |
],
|
|
|
5467 |
'Protocol Buffers' => [
|
|
|
5468 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5469 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5470 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5471 |
],
|
|
|
5472 |
'Python' => [
|
|
|
5473 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5474 |
[ 'docstring_to_C' ],
|
|
|
5475 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5476 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5477 |
],
|
|
|
5478 |
'PHP' => [
|
|
|
5479 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5480 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5481 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5482 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5483 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5484 |
],
|
|
|
5485 |
'QML' => [
|
|
|
5486 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5487 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5488 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5489 |
],
|
|
|
5490 |
'Qt Project' => [
|
|
|
5491 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5492 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5493 |
],
|
|
|
5494 |
'R' => [
|
|
|
5495 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5496 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5497 |
],
|
|
|
5498 |
'Racket' => [
|
|
|
5499 |
[ 'remove_matches' , '^\s*;' ],
|
|
|
5500 |
[ 'remove_inline' , ';.*$' ],
|
|
|
5501 |
],
|
|
|
5502 |
'Razor' => [
|
|
|
5503 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5504 |
[ 'remove_between_general', '@*', '*@' ],
|
|
|
5505 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5506 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5507 |
],
|
|
|
5508 |
'RobotFramework' => [
|
|
|
5509 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5510 |
[ 'remove_matches' , '^\s*Comment' ],
|
|
|
5511 |
[ 'remove_matches' , '^\s*\*{3}\s+(Variables|Test\s+Cases|Settings|Keywords)\s+\*{3}' ] ,
|
|
|
5512 |
[ 'remove_matches' , '^\s*\[(Documentation|Tags)\]' ],
|
|
|
5513 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5514 |
],
|
|
|
5515 |
'Rexx' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
5516 |
'Ruby' => [
|
|
|
5517 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5518 |
[ 'remove_below_above' , '^=begin', '^=end' ],
|
|
|
5519 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5520 |
],
|
|
|
5521 |
'Ruby HTML' => [ [ 'remove_html_comments', ],
|
|
|
5522 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
5523 |
'Rust' => [
|
|
|
5524 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5525 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5526 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5527 |
],
|
|
|
5528 |
'SAS' => [
|
|
|
5529 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5530 |
[ 'remove_between_general', '*', ';' ],
|
|
|
5531 |
],
|
|
|
5532 |
'SASS' => [
|
|
|
5533 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5534 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5535 |
],
|
|
|
5536 |
'Scala' => [
|
|
|
5537 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5538 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5539 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5540 |
],
|
|
|
5541 |
'SKILL' => [
|
|
|
5542 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5543 |
[ 'remove_matches' , '^\s*;' ],
|
|
|
5544 |
],
|
|
|
5545 |
'SKILL++' => [
|
|
|
5546 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5547 |
[ 'remove_matches' , '^\s*;' ],
|
|
|
5548 |
],
|
|
|
5549 |
'SQL' => [
|
|
|
5550 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5551 |
[ 'remove_matches' , '^\s*--' ],
|
|
|
5552 |
[ 'remove_inline' , '--.*$' ],
|
|
|
5553 |
],
|
|
|
5554 |
'SQL Stored Procedure'=> [
|
|
|
5555 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5556 |
[ 'remove_matches' , '^\s*--' ],
|
|
|
5557 |
[ 'remove_inline' , '--.*$' ],
|
|
|
5558 |
],
|
|
|
5559 |
'SQL Data' => [
|
|
|
5560 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5561 |
[ 'remove_matches' , '^\s*--' ],
|
|
|
5562 |
[ 'remove_inline' , '--.*$' ],
|
|
|
5563 |
],
|
|
|
5564 |
'sed' => [
|
|
|
5565 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5566 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5567 |
],
|
|
|
5568 |
'Smarty' => [
|
|
|
5569 |
[ 'smarty_to_C' ],
|
|
|
5570 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5571 |
],
|
|
|
5572 |
'Standard ML' => [
|
|
|
5573 |
[ 'remove_between_general', '(*', '*)' ],
|
|
|
5574 |
],
|
|
|
5575 |
'Swift' => [
|
|
|
5576 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5577 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5578 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5579 |
],
|
|
|
5580 |
|
|
|
5581 |
'm4' => [ [ 'remove_matches' , '^dnl ' ], ],
|
|
|
5582 |
'C Shell' => [
|
|
|
5583 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5584 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5585 |
],
|
|
|
5586 |
'Kermit' => [
|
|
|
5587 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5588 |
[ 'remove_matches' , '^\s*;' ],
|
|
|
5589 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5590 |
],
|
|
|
5591 |
'Korn Shell' => [
|
|
|
5592 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5593 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5594 |
],
|
|
|
5595 |
'Tcl/Tk' => [
|
|
|
5596 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5597 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5598 |
],
|
|
|
5599 |
'Teamcenter met' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
5600 |
'Teamcenter mth' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
5601 |
'Titanium Style Sheet' => [
|
|
|
5602 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5603 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5604 |
[ 'remove_between_regex', '/[^/]', '[^/]/' ],
|
|
|
5605 |
],
|
|
|
5606 |
'TypeScript' => [
|
|
|
5607 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5608 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5609 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5610 |
],
|
|
|
5611 |
'Unity-Prefab' => [
|
|
|
5612 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5613 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5614 |
],
|
|
|
5615 |
'Visual Fox Pro' => [
|
|
|
5616 |
[ 'remove_matches' , '^\s*\*' ],
|
|
|
5617 |
[ 'remove_inline' , '\*.*$' ],
|
|
|
5618 |
[ 'remove_matches' , '^\s*&&' ],
|
|
|
5619 |
[ 'remove_inline' , '&&.*$' ],
|
|
|
5620 |
],
|
|
|
5621 |
'Softbridge Basic' => [ [ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ],
|
|
|
5622 |
[ 'remove_matches' , '^\s*Attribute\s+'],
|
|
|
5623 |
[ 'remove_matches' , '^\s*\47'], ], # \47 = '
|
|
|
5624 |
# http://www.altium.com/files/learningguides/TR0114%20VHDL%20Language%20Reference.pdf
|
|
|
5625 |
'Vala' => [
|
|
|
5626 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5627 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5628 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5629 |
],
|
|
|
5630 |
'Vala Header' => [
|
|
|
5631 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5632 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5633 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5634 |
],
|
|
|
5635 |
'Verilog-SystemVerilog' => [
|
|
|
5636 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5637 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5638 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5639 |
],
|
|
|
5640 |
'VHDL' => [
|
|
|
5641 |
[ 'remove_matches' , '^\s*--' ],
|
|
|
5642 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5643 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5644 |
[ 'remove_inline' , '--.*$' ],
|
|
|
5645 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5646 |
],
|
|
|
5647 |
'vim script' => [
|
|
|
5648 |
[ 'remove_matches' , '^\s*"' ],
|
|
|
5649 |
[ 'remove_inline' , '".*$' ],
|
|
|
5650 |
],
|
|
|
5651 |
'Visual Basic' => [ [ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ],
|
|
|
5652 |
[ 'remove_matches' , '^\s*Attribute\s+'],
|
|
|
5653 |
[ 'remove_matches' , '^\s*\47'], ], # \47 = '
|
|
|
5654 |
'Visualforce Component' => [
|
|
|
5655 |
[ 'remove_html_comments', ],
|
|
|
5656 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5657 |
],
|
|
|
5658 |
'Visualforce Page' => [
|
|
|
5659 |
[ 'remove_html_comments', ],
|
|
|
5660 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5661 |
],
|
|
|
5662 |
'Velocity Template Language' => [
|
|
|
5663 |
[ 'remove_html_comments', ],
|
|
|
5664 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5665 |
[ 'remove_jsp_comments' , ],
|
|
|
5666 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5667 |
[ 'add_newlines' , ],
|
|
|
5668 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5669 |
],
|
|
|
5670 |
'Teamcenter def' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
5671 |
'Windows Module Definition' => [
|
|
|
5672 |
[ 'remove_matches' , '^\s*;' ],
|
|
|
5673 |
[ 'remove_inline' , ';.*$' ],
|
|
|
5674 |
],
|
|
|
5675 |
'yacc' => [
|
|
|
5676 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5677 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5678 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5679 |
],
|
|
|
5680 |
'YAML' => [
|
|
|
5681 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
5682 |
[ 'remove_inline' , '#.*$' ],
|
|
|
5683 |
],
|
|
|
5684 |
'lex' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
5685 |
'XAML' => [ [ 'remove_html_comments', ],
|
|
|
5686 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
5687 |
'xBase Header' => [
|
|
|
5688 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5689 |
[ 'remove_matches' , '^\s*\&\&' ],
|
|
|
5690 |
[ 'remove_matches' , '^\s*\*' ],
|
|
|
5691 |
[ 'remove_matches' , '^\s*NOTE' ],
|
|
|
5692 |
[ 'remove_matches' , '^\s*note' ],
|
|
|
5693 |
[ 'remove_matches' , '^\s*Note' ],
|
|
|
5694 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5695 |
[ 'remove_inline' , '\&\&.*$' ],
|
|
|
5696 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5697 |
],
|
|
|
5698 |
'xBase' => [
|
|
|
5699 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5700 |
[ 'remove_matches' , '^\s*\&\&' ],
|
|
|
5701 |
[ 'remove_matches' , '^\s*\*' ],
|
|
|
5702 |
[ 'remove_matches' , '^\s*NOTE' ],
|
|
|
5703 |
[ 'remove_matches' , '^\s*note' ],
|
|
|
5704 |
[ 'remove_matches' , '^\s*Note' ],
|
|
|
5705 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5706 |
[ 'remove_inline' , '\&\&.*$' ],
|
|
|
5707 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5708 |
],
|
|
|
5709 |
'MXML' => [
|
|
|
5710 |
[ 'remove_html_comments', ],
|
|
|
5711 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5712 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5713 |
[ 'add_newlines' , ],
|
|
|
5714 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5715 |
],
|
|
|
5716 |
'Windows Message File' => [
|
|
|
5717 |
[ 'remove_matches' , '^\s*;\s*//' ],
|
|
|
5718 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5719 |
[ 'remove_matches' , '^\s*;\s*$' ],
|
|
|
5720 |
# next line only hypothetical
|
|
|
5721 |
# [ 'remove_matches_2re' , '^\s*;\s*/\*',
|
|
|
5722 |
# '^\s*;\s*\*/', ],
|
|
|
5723 |
],
|
|
|
5724 |
'Windows Resource File' => [
|
|
|
5725 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
5726 |
[ 'remove_inline' , '//.*$' ],
|
|
|
5727 |
[ 'call_regexp_common' , 'C' ],
|
|
|
5728 |
],
|
|
|
5729 |
'WiX source' => [
|
|
|
5730 |
[ 'remove_html_comments', ],
|
|
|
5731 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5732 |
],
|
|
|
5733 |
'WiX include' => [
|
|
|
5734 |
[ 'remove_html_comments', ],
|
|
|
5735 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5736 |
],
|
|
|
5737 |
'WiX string localization' => [
|
|
|
5738 |
[ 'remove_html_comments', ],
|
|
|
5739 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5740 |
],
|
|
|
5741 |
'XML' => [
|
|
|
5742 |
[ 'remove_html_comments', ],
|
|
|
5743 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
5744 |
],
|
|
|
5745 |
'XQuery' => [
|
|
|
5746 |
[ 'remove_between_general', '(:', ':)' ],
|
|
|
5747 |
],
|
|
|
5748 |
'XSD' => [ [ 'remove_html_comments', ],
|
|
|
5749 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
5750 |
'XSLT' => [ [ 'remove_html_comments', ],
|
|
|
5751 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
5752 |
'NAnt script' => [ [ 'remove_html_comments', ],
|
|
|
5753 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
5754 |
'MSBuild script' => [ [ 'remove_html_comments', ],
|
|
|
5755 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
5756 |
);
|
|
|
5757 |
# 1}}}
|
|
|
5758 |
%{$rh_EOL_continuation_re} = ( # {{{1
|
|
|
5759 |
'ActionScript' => '\\\\$' ,
|
|
|
5760 |
'Assembly' => '\\\\$' ,
|
|
|
5761 |
'ASP' => '\\\\$' ,
|
|
|
5762 |
'ASP.Net' => '\\\\$' ,
|
|
|
5763 |
'Ada' => '\\\\$' ,
|
|
|
5764 |
'awk' => '\\\\$' ,
|
|
|
5765 |
'bc' => '\\\\$' ,
|
|
|
5766 |
'C' => '\\\\$' ,
|
|
|
5767 |
'C++' => '\\\\$' ,
|
|
|
5768 |
'C/C++ Header' => '\\\\$' ,
|
|
|
5769 |
'CMake' => '\\\\$' ,
|
|
|
5770 |
'Cython' => '\\\\$' ,
|
|
|
5771 |
'C#' => '\\\\$' ,
|
|
|
5772 |
'D' => '\\\\$' ,
|
|
|
5773 |
'Dart' => '\\\\$' ,
|
|
|
5774 |
'Expect' => '\\\\$' ,
|
|
|
5775 |
'Go' => '\\\\$' ,
|
|
|
5776 |
'IDL' => '\$\\$' ,
|
|
|
5777 |
'Java' => '\\\\$' ,
|
|
|
5778 |
'Javascript' => '\\\\$' ,
|
|
|
5779 |
'LESS' => '\\\\$' ,
|
|
|
5780 |
'Lua' => '\\\\$' ,
|
|
|
5781 |
'make' => '\\\\$' ,
|
|
|
5782 |
'MATLAB' => '\.\.\.\s*$' ,
|
|
|
5783 |
'MXML' => '\\\\$' ,
|
|
|
5784 |
'Objective C' => '\\\\$' ,
|
|
|
5785 |
'Objective C++' => '\\\\$' ,
|
|
|
5786 |
'OCaml' => '\\\\$' ,
|
|
|
5787 |
'Octave' => '\.\.\.\s*$' ,
|
|
|
5788 |
'Qt Project' => '\\\\$' ,
|
|
|
5789 |
'Patran Command Language'=> '\\\\$' ,
|
|
|
5790 |
'PowerShell' => '\\\\$' ,
|
|
|
5791 |
'Python' => '\\\\$' ,
|
|
|
5792 |
'R' => '\\\\$' ,
|
|
|
5793 |
'Ruby' => '\\\\$' ,
|
|
|
5794 |
'sed' => '\\\\$' ,
|
|
|
5795 |
'Swift' => '\\\\$' ,
|
|
|
5796 |
'Bourne Again Shell' => '\\\\$' ,
|
|
|
5797 |
'Bourne Shell' => '\\\\$' ,
|
|
|
5798 |
'C Shell' => '\\\\$' ,
|
|
|
5799 |
'Kermit' => '\\\\$' ,
|
|
|
5800 |
'Korn Shell' => '\\\\$' ,
|
|
|
5801 |
'Tcl/Tk' => '\\\\$' ,
|
|
|
5802 |
'TypeScript' => '\\\\$' ,
|
|
|
5803 |
'lex' => '\\\\$' ,
|
|
|
5804 |
'Vala' => '\\\\$' ,
|
|
|
5805 |
'Vala Header' => '\\\\$' ,
|
|
|
5806 |
);
|
|
|
5807 |
# 1}}}
|
|
|
5808 |
%{$rh_Not_Code_Extension} = ( # {{{1
|
|
|
5809 |
'1' => 1, # Man pages (documentation):
|
|
|
5810 |
'2' => 1,
|
|
|
5811 |
'3' => 1,
|
|
|
5812 |
'4' => 1,
|
|
|
5813 |
'5' => 1,
|
|
|
5814 |
'6' => 1,
|
|
|
5815 |
'7' => 1,
|
|
|
5816 |
'8' => 1,
|
|
|
5817 |
'9' => 1,
|
|
|
5818 |
'a' => 1, # Static object code.
|
|
|
5819 |
'ad' => 1, # X application default resource file.
|
|
|
5820 |
'afm' => 1, # font metrics
|
|
|
5821 |
'arc' => 1, # arc(1) archive
|
|
|
5822 |
'arj' => 1, # arj(1) archive
|
|
|
5823 |
'au' => 1, # Audio sound filearj(1) archive
|
|
|
5824 |
'bak' => 1, # Backup files - we only want to count the "real" files.
|
|
|
5825 |
'bdf' => 1,
|
|
|
5826 |
'bmp' => 1,
|
|
|
5827 |
'bz2' => 1, # bzip2(1) compressed file
|
|
|
5828 |
'csv' => 1, # comma separated values
|
|
|
5829 |
'desktop' => 1,
|
|
|
5830 |
'dic' => 1,
|
|
|
5831 |
'doc' => 1,
|
|
|
5832 |
'elc' => 1,
|
|
|
5833 |
'eps' => 1,
|
|
|
5834 |
'fig' => 1,
|
|
|
5835 |
'gif' => 1,
|
|
|
5836 |
'gz' => 1,
|
|
|
5837 |
'hdf' => 1, # hierarchical data format
|
|
|
5838 |
'in' => 1, # Debatable.
|
|
|
5839 |
'jpg' => 1,
|
|
|
5840 |
'kdelnk' => 1,
|
|
|
5841 |
'man' => 1,
|
|
|
5842 |
'mf' => 1,
|
|
|
5843 |
'mp3' => 1,
|
|
|
5844 |
'n' => 1,
|
|
|
5845 |
'o' => 1, # Object code is generated from source code.
|
|
|
5846 |
'pbm' => 1,
|
|
|
5847 |
'pdf' => 1,
|
|
|
5848 |
'pfb' => 1,
|
|
|
5849 |
'png' => 1,
|
|
|
5850 |
'po' => 1,
|
|
|
5851 |
'ps' => 1, # Postscript is _USUALLY_ generated automatically.
|
|
|
5852 |
'sgm' => 1,
|
|
|
5853 |
'sgml' => 1,
|
|
|
5854 |
'so' => 1, # Dynamically-loaded object code.
|
|
|
5855 |
'Tag' => 1,
|
|
|
5856 |
'tex' => 1,
|
|
|
5857 |
'text' => 1,
|
|
|
5858 |
'tfm' => 1,
|
|
|
5859 |
'tgz' => 1, # gzipped tarball
|
|
|
5860 |
'tiff' => 1,
|
|
|
5861 |
'txt' => 1,
|
|
|
5862 |
'vf' => 1,
|
|
|
5863 |
'wav' => 1,
|
|
|
5864 |
'xbm' => 1,
|
|
|
5865 |
'xpm' => 1,
|
|
|
5866 |
'Y' => 1, # file compressed with "Yabba"
|
|
|
5867 |
'Z' => 1, # file compressed with "compress"
|
|
|
5868 |
'zip' => 1, # zip archive
|
|
|
5869 |
); # 1}}}
|
|
|
5870 |
%{$rh_Not_Code_Filename} = ( # {{{1
|
|
|
5871 |
'AUTHORS' => 1,
|
|
|
5872 |
'BUGS' => 1,
|
|
|
5873 |
'BUGS' => 1,
|
|
|
5874 |
'Changelog' => 1,
|
|
|
5875 |
'ChangeLog' => 1,
|
|
|
5876 |
'ChangeLog' => 1,
|
|
|
5877 |
'Changes' => 1,
|
|
|
5878 |
'CHANGES' => 1,
|
|
|
5879 |
'COPYING' => 1,
|
|
|
5880 |
'COPYING' => 1,
|
|
|
5881 |
'.cvsignore' => 1,
|
|
|
5882 |
'Entries' => 1,
|
|
|
5883 |
'FAQ' => 1,
|
|
|
5884 |
'iconfig.h' => 1, # Skip "iconfig.h" files; they're used in Imakefiles.
|
|
|
5885 |
'INSTALL' => 1,
|
|
|
5886 |
'MAINTAINERS' => 1,
|
|
|
5887 |
'MD5SUMS' => 1,
|
|
|
5888 |
'NEWS' => 1,
|
|
|
5889 |
'readme' => 1,
|
|
|
5890 |
'Readme' => 1,
|
|
|
5891 |
'README' => 1,
|
|
|
5892 |
'README.tk' => 1, # used in kdemultimedia, it's confusing.
|
|
|
5893 |
'Repository' => 1,
|
|
|
5894 |
'Root' => 1, # CVS
|
|
|
5895 |
'TODO' => 1,
|
|
|
5896 |
);
|
|
|
5897 |
# 1}}}
|
|
|
5898 |
%{$rh_Scale_Factor} = ( # {{{1
|
|
|
5899 |
'(unknown)' => 0.00,
|
|
|
5900 |
'1032/af' => 5.00,
|
|
|
5901 |
'1st generation default' => 0.25,
|
|
|
5902 |
'2nd generation default' => 0.75,
|
|
|
5903 |
'3rd generation default' => 1.00,
|
|
|
5904 |
'4th generation default' => 4.00,
|
|
|
5905 |
'5th generation default' => 16.00,
|
|
|
5906 |
'aas macro' => 0.88,
|
|
|
5907 |
'abap/4' => 5.00,
|
|
|
5908 |
'ABAP' => 5.00,
|
|
|
5909 |
'accel' => 4.21,
|
|
|
5910 |
'access' => 2.11,
|
|
|
5911 |
'ActionScript' => 1.36,
|
|
|
5912 |
'actor' => 3.81,
|
|
|
5913 |
'acumen' => 2.86,
|
|
|
5914 |
'Ada' => 0.52,
|
|
|
5915 |
'Ada 83' => 1.13,
|
|
|
5916 |
'Ada 95' => 1.63,
|
|
|
5917 |
'adr/dl' => 2.00,
|
|
|
5918 |
'adr/ideal/pdl' => 4.00,
|
|
|
5919 |
'ads/batch' => 4.00,
|
|
|
5920 |
'ads/online' => 4.00,
|
|
|
5921 |
'ADSO/IDSM' => 3.00,
|
|
|
5922 |
'advantage' => 2.11,
|
|
|
5923 |
'ai shell default' => 1.63,
|
|
|
5924 |
'ai shells' => 1.63,
|
|
|
5925 |
'algol 68' => 0.75,
|
|
|
5926 |
'algol w' => 0.75,
|
|
|
5927 |
'ambush' => 2.50,
|
|
|
5928 |
'aml' => 1.63,
|
|
|
5929 |
'AMPLE' => 2.00,
|
|
|
5930 |
'Ant/XML' => 1.90,
|
|
|
5931 |
'Ant' => 1.90,
|
|
|
5932 |
'amppl ii' => 1.25,
|
|
|
5933 |
'ansi basic' => 1.25,
|
|
|
5934 |
'ansi cobol 74' => 0.75,
|
|
|
5935 |
'ansi cobol 85' => 0.88,
|
|
|
5936 |
'SQL' => 6.15,
|
|
|
5937 |
'SQL Stored Procedure' => 6.15,
|
|
|
5938 |
'SQL Data' => 1.00,
|
|
|
5939 |
'answer/db' => 6.15,
|
|
|
5940 |
'apl 360/370' => 2.50,
|
|
|
5941 |
'apl default' => 2.50,
|
|
|
5942 |
'apl*plus' => 2.50,
|
|
|
5943 |
'applesoft basic' => 0.63,
|
|
|
5944 |
'application builder' => 4.00,
|
|
|
5945 |
'application manager' => 2.22,
|
|
|
5946 |
'aps' => 0.96,
|
|
|
5947 |
'aps' => 4.71,
|
|
|
5948 |
'apt' => 1.13,
|
|
|
5949 |
'aptools' => 4.00,
|
|
|
5950 |
'arc' => 1.63,
|
|
|
5951 |
'ariel' => 0.75,
|
|
|
5952 |
'arity' => 1.63,
|
|
|
5953 |
'art' => 1.63,
|
|
|
5954 |
'art enterprise' => 1.74,
|
|
|
5955 |
'artemis' => 2.00,
|
|
|
5956 |
'artim' => 1.74,
|
|
|
5957 |
'as/set' => 4.21,
|
|
|
5958 |
'asi/inquiry' => 6.15,
|
|
|
5959 |
'ask windows' => 1.74,
|
|
|
5960 |
'asa' => 1.29,
|
|
|
5961 |
'ASP' => 1.29,
|
|
|
5962 |
'ASP.Net' => 1.29,
|
|
|
5963 |
'aspx' => 1.29,
|
|
|
5964 |
'asax' => 1.29,
|
|
|
5965 |
'ascx' => 1.29,
|
|
|
5966 |
'asmx' => 1.29,
|
|
|
5967 |
'config' => 1.29,
|
|
|
5968 |
'webinfo' => 1.29,
|
|
|
5969 |
'CCS' => 5.33,
|
|
|
5970 |
'Apex Trigger' => 1.4 ,
|
|
|
5971 |
'Arduino Sketch' => 1.00,
|
|
|
5972 |
'Assembly' => 0.25,
|
|
|
5973 |
'Assembly (macro)' => 0.51,
|
|
|
5974 |
'associative default' => 1.25,
|
|
|
5975 |
'autocoder' => 0.25,
|
|
|
5976 |
'AutoHotkey' => 1.29,
|
|
|
5977 |
'awk' => 3.81,
|
|
|
5978 |
'aztec c' => 0.63,
|
|
|
5979 |
'balm' => 0.75,
|
|
|
5980 |
'base sas' => 1.51,
|
|
|
5981 |
'basic' => 0.75,
|
|
|
5982 |
'basic a' => 0.63,
|
|
|
5983 |
'bc' => 1.50,
|
|
|
5984 |
'berkeley pascal' => 0.88,
|
|
|
5985 |
'better basic' => 0.88,
|
|
|
5986 |
'bliss' => 0.75,
|
|
|
5987 |
'bmsgen' => 2.22,
|
|
|
5988 |
'boeingcalc' => 13.33,
|
|
|
5989 |
'bteq' => 6.15,
|
|
|
5990 |
'C' => 0.77,
|
|
|
5991 |
'c set 2' => 0.88,
|
|
|
5992 |
'C#' => 1.36,
|
|
|
5993 |
'C++' => 1.51,
|
|
|
5994 |
'c86plus' => 0.63,
|
|
|
5995 |
'cadbfast' => 2.00,
|
|
|
5996 |
'caearl' => 2.86,
|
|
|
5997 |
'cast' => 1.63,
|
|
|
5998 |
'cbasic' => 0.88,
|
|
|
5999 |
'cdadl' => 4.00,
|
|
|
6000 |
'cellsim' => 1.74,
|
|
|
6001 |
'ColdFusion' => 4.00,
|
|
|
6002 |
'ColdFusion CFScript' => 4.00,
|
|
|
6003 |
'chili' => 0.75,
|
|
|
6004 |
'chill' => 0.75,
|
|
|
6005 |
'cics' => 1.74,
|
|
|
6006 |
'clarion' => 1.38,
|
|
|
6007 |
'clascal' => 1.00,
|
|
|
6008 |
'cli' => 2.50,
|
|
|
6009 |
'clipper' => 2.05,
|
|
|
6010 |
'clipper db' => 2.00,
|
|
|
6011 |
'clos' => 3.81,
|
|
|
6012 |
'Clojure' => 1.25,
|
|
|
6013 |
'ClojureScript' => 1.25,
|
|
|
6014 |
'clout' => 2.00,
|
|
|
6015 |
'CMake' => 1.00,
|
|
|
6016 |
'cms2' => 0.75,
|
|
|
6017 |
'cmsgen' => 4.21,
|
|
|
6018 |
'COBOL' => 1.04,
|
|
|
6019 |
'COBOL ii' => 0.75,
|
|
|
6020 |
'COBOL/400' => 0.88,
|
|
|
6021 |
'cobra' => 4.00,
|
|
|
6022 |
'codecenter' => 2.22,
|
|
|
6023 |
'cofac' => 2.22,
|
|
|
6024 |
'CoffeeScript' => 2.00,
|
|
|
6025 |
'cogen' => 2.22,
|
|
|
6026 |
'cognos' => 2.22,
|
|
|
6027 |
'cogo' => 1.13,
|
|
|
6028 |
'comal' => 1.00,
|
|
|
6029 |
'comit ii' => 1.25,
|
|
|
6030 |
'common lisp' => 1.25,
|
|
|
6031 |
'concurrent pascal' => 1.00,
|
|
|
6032 |
'conniver' => 1.25,
|
|
|
6033 |
'cool:gen/ief' => 2.58,
|
|
|
6034 |
'coral 66' => 0.75,
|
|
|
6035 |
'corvet' => 4.21,
|
|
|
6036 |
'corvision' => 5.33,
|
|
|
6037 |
'cpl' => 0.50,
|
|
|
6038 |
'Crystal Reports' => 4.00,
|
|
|
6039 |
'csl' => 1.63,
|
|
|
6040 |
'csp' => 1.51,
|
|
|
6041 |
'cssl' => 1.74,
|
|
|
6042 |
'CSS' => 1.0,
|
|
|
6043 |
'culprit' => 1.57,
|
|
|
6044 |
'CUDA' => 1.00,
|
|
|
6045 |
'cxpert' => 1.63,
|
|
|
6046 |
'cygnet' => 4.21,
|
|
|
6047 |
'D' => 1.70,
|
|
|
6048 |
'DAL' => 1.50,
|
|
|
6049 |
'Dart' => 2.00,
|
|
|
6050 |
'data base default' => 2.00,
|
|
|
6051 |
'dataflex' => 2.00,
|
|
|
6052 |
'datatrieve' => 4.00,
|
|
|
6053 |
'dbase iii' => 2.00,
|
|
|
6054 |
'dbase iv' => 1.54,
|
|
|
6055 |
'dcl' => 0.38,
|
|
|
6056 |
'diff' => 1.00,
|
|
|
6057 |
'decision support default' => 2.22,
|
|
|
6058 |
'decrally' => 2.00,
|
|
|
6059 |
'delphi' => 2.76,
|
|
|
6060 |
'DITA' => 1.90,
|
|
|
6061 |
'dl/1' => 2.00,
|
|
|
6062 |
'dtrace' => 2.00,
|
|
|
6063 |
'NASTRAN DMAP' => 2.35,
|
|
|
6064 |
'dna4' => 4.21,
|
|
|
6065 |
'DOORS Extension Language' => 1.50,
|
|
|
6066 |
'DOS Batch' => 0.63,
|
|
|
6067 |
'dsp assembly' => 0.50,
|
|
|
6068 |
'dtabl' => 1.74,
|
|
|
6069 |
'dtipt' => 1.74,
|
|
|
6070 |
'dyana' => 1.13,
|
|
|
6071 |
'dynamoiii' => 1.74,
|
|
|
6072 |
'easel' => 2.76,
|
|
|
6073 |
'easy' => 1.63,
|
|
|
6074 |
'easytrieve+' => 2.35,
|
|
|
6075 |
'eclipse' => 1.63,
|
|
|
6076 |
'ECPP' => 1.90,
|
|
|
6077 |
'eda/sql' => 6.67,
|
|
|
6078 |
'edscheme 3.4' => 1.51,
|
|
|
6079 |
'eiffel' => 3.81,
|
|
|
6080 |
'Elixir' => 2.11,
|
|
|
6081 |
'enform' => 1.74,
|
|
|
6082 |
'englishbased default' => 1.51,
|
|
|
6083 |
'ensemble' => 2.76,
|
|
|
6084 |
'epos' => 4.00,
|
|
|
6085 |
'ERB' => 2.00,
|
|
|
6086 |
'Erlang' => 2.11,
|
|
|
6087 |
'esf' => 2.00,
|
|
|
6088 |
'espadvisor' => 1.63,
|
|
|
6089 |
'espl/i' => 1.13,
|
|
|
6090 |
'euclid' => 0.75,
|
|
|
6091 |
'excel' => 1.74,
|
|
|
6092 |
'excel 12' => 13.33,
|
|
|
6093 |
'excel 34' => 13.33,
|
|
|
6094 |
'excel 5' => 13.33,
|
|
|
6095 |
'express' => 2.22,
|
|
|
6096 |
'exsys' => 1.63,
|
|
|
6097 |
'extended common lisp' => 1.43,
|
|
|
6098 |
'eznomad' => 2.22,
|
|
|
6099 |
'facets' => 4.00,
|
|
|
6100 |
'factorylink iv' => 2.76,
|
|
|
6101 |
'fame' => 2.22,
|
|
|
6102 |
'filemaker pro' => 2.22,
|
|
|
6103 |
'flavors' => 2.76,
|
|
|
6104 |
'flex' => 1.74,
|
|
|
6105 |
'flexgen' => 2.76,
|
|
|
6106 |
'Focus' => 1.90,
|
|
|
6107 |
'foil' => 1.51,
|
|
|
6108 |
'forte' => 4.44,
|
|
|
6109 |
'forth' => 1.25,
|
|
|
6110 |
'Fortran 66' => 0.63,
|
|
|
6111 |
'Fortran 77' => 0.75,
|
|
|
6112 |
'Fortran 90' => 1.00,
|
|
|
6113 |
'Fortran 95' => 1.13,
|
|
|
6114 |
'Fortran II' => 0.63,
|
|
|
6115 |
'foundation' => 2.76,
|
|
|
6116 |
'foxpro' => 2.29,
|
|
|
6117 |
'foxpro 1' => 2.00,
|
|
|
6118 |
'foxpro 2.5' => 2.35,
|
|
|
6119 |
'framework' => 13.33,
|
|
|
6120 |
'F#' => 2.50,
|
|
|
6121 |
'g2' => 1.63,
|
|
|
6122 |
'gamma' => 5.00,
|
|
|
6123 |
'genascript' => 2.96,
|
|
|
6124 |
'gener/ol' => 6.15,
|
|
|
6125 |
'genexus' => 5.33,
|
|
|
6126 |
'genifer' => 4.21,
|
|
|
6127 |
'geode 2.0' => 5.00,
|
|
|
6128 |
'gfa basic' => 2.35,
|
|
|
6129 |
'gml' => 1.74,
|
|
|
6130 |
'golden common lisp' => 1.25,
|
|
|
6131 |
'gpss' => 1.74,
|
|
|
6132 |
'guest' => 2.86,
|
|
|
6133 |
'guru' => 1.63,
|
|
|
6134 |
'Go' => 2.50,
|
|
|
6135 |
'Grails' => 1.48,
|
|
|
6136 |
'Groovy' => 4.10,
|
|
|
6137 |
'gw basic' => 0.82,
|
|
|
6138 |
'Harbour' => 2.00,
|
|
|
6139 |
'Haskell' => 2.11,
|
|
|
6140 |
'high c' => 0.63,
|
|
|
6141 |
'hlevel' => 1.38,
|
|
|
6142 |
'hp basic' => 0.63,
|
|
|
6143 |
'Haml' => 2.50,
|
|
|
6144 |
'Handlebars' => 2.50,
|
|
|
6145 |
'HTML' => 1.90,
|
|
|
6146 |
'XML' => 1.90,
|
|
|
6147 |
'MXML' => 1.90,
|
|
|
6148 |
'XSLT' => 1.90,
|
|
|
6149 |
'DTD' => 1.90,
|
|
|
6150 |
'XSD' => 1.90,
|
|
|
6151 |
'NAnt script' => 1.90,
|
|
|
6152 |
'MSBuild script' => 1.90,
|
|
|
6153 |
'HLSL' => 2.00,
|
|
|
6154 |
'HTML 2' => 5.00,
|
|
|
6155 |
'HTML 3' => 5.33,
|
|
|
6156 |
'huron' => 5.00,
|
|
|
6157 |
'ibm adf i' => 4.00,
|
|
|
6158 |
'ibm adf ii' => 4.44,
|
|
|
6159 |
'ibm advanced basic' => 0.82,
|
|
|
6160 |
'ibm cics/vs' => 2.00,
|
|
|
6161 |
'ibm compiled basic' => 0.88,
|
|
|
6162 |
'ibm vs cobol' => 0.75,
|
|
|
6163 |
'ibm vs cobol ii' => 0.88,
|
|
|
6164 |
'ices' => 1.13,
|
|
|
6165 |
'icon' => 1.00,
|
|
|
6166 |
'ideal' => 1.54,
|
|
|
6167 |
'idms' => 2.00,
|
|
|
6168 |
'ief' => 5.71,
|
|
|
6169 |
'ief/cool:gen' => 2.58,
|
|
|
6170 |
'iew' => 5.71,
|
|
|
6171 |
'ifps/plus' => 2.50,
|
|
|
6172 |
'imprs' => 2.00,
|
|
|
6173 |
'informix' => 2.58,
|
|
|
6174 |
'ingres' => 2.00,
|
|
|
6175 |
'inquire' => 6.15,
|
|
|
6176 |
'insight2' => 1.63,
|
|
|
6177 |
'install/1' => 5.00,
|
|
|
6178 |
'InstallShield' => 1.90,
|
|
|
6179 |
'intellect' => 1.51,
|
|
|
6180 |
'interlisp' => 1.38,
|
|
|
6181 |
'interpreted basic' => 0.75,
|
|
|
6182 |
'interpreted c' => 0.63,
|
|
|
6183 |
'iqlisp' => 1.38,
|
|
|
6184 |
'iqrp' => 6.15,
|
|
|
6185 |
'j2ee' => 1.60,
|
|
|
6186 |
'janus' => 1.13,
|
|
|
6187 |
'Java' => 1.36,
|
|
|
6188 |
'Javascript' => 1.48,
|
|
|
6189 |
'JavaServer Faces' => 1.5 ,
|
|
|
6190 |
'JSON' => 2.50,
|
|
|
6191 |
'JSP' => 1.48,
|
|
|
6192 |
'Velocity Template Language' => 1.00,
|
|
|
6193 |
'JCL' => 1.67,
|
|
|
6194 |
'joss' => 0.75,
|
|
|
6195 |
'jovial' => 0.75,
|
|
|
6196 |
'jsp' => 1.36,
|
|
|
6197 |
'kappa' => 2.00,
|
|
|
6198 |
'kbms' => 1.63,
|
|
|
6199 |
'kcl' => 1.25,
|
|
|
6200 |
'kee' => 1.63,
|
|
|
6201 |
'keyplus' => 2.00,
|
|
|
6202 |
'kl' => 1.25,
|
|
|
6203 |
'klo' => 1.25,
|
|
|
6204 |
'knowol' => 1.63,
|
|
|
6205 |
'krl' => 1.38,
|
|
|
6206 |
'Kermit' => 2.00,
|
|
|
6207 |
'Korn Shell' => 3.81,
|
|
|
6208 |
'Kotlin' => 2.00,
|
|
|
6209 |
'ladder logic' => 2.22,
|
|
|
6210 |
'lambit/l' => 1.25,
|
|
|
6211 |
'lattice c' => 0.63,
|
|
|
6212 |
'LESS' => 1.50,
|
|
|
6213 |
'liana' => 0.63,
|
|
|
6214 |
'lilith' => 1.13,
|
|
|
6215 |
'linc ii' => 5.71,
|
|
|
6216 |
'Lisp' => 1.25,
|
|
|
6217 |
'LiveLink OScript' => 3.5 ,
|
|
|
6218 |
'loglisp' => 1.38,
|
|
|
6219 |
'loops' => 3.81,
|
|
|
6220 |
'lotus 123 dos' => 13.33,
|
|
|
6221 |
'lotus macros' => 0.75,
|
|
|
6222 |
'lotus notes' => 3.64,
|
|
|
6223 |
'lucid 3d' => 13.33,
|
|
|
6224 |
'lyric' => 1.51,
|
|
|
6225 |
'm4' => 1.00,
|
|
|
6226 |
'm' => 5.00,
|
|
|
6227 |
'macforth' => 1.25,
|
|
|
6228 |
'mach1' => 2.00,
|
|
|
6229 |
'machine language' => 0.13,
|
|
|
6230 |
'maestro' => 5.00,
|
|
|
6231 |
'magec' => 5.00,
|
|
|
6232 |
'magik' => 3.81,
|
|
|
6233 |
'Lake' => 3.81,
|
|
|
6234 |
'make' => 2.50,
|
|
|
6235 |
'mantis' => 2.96,
|
|
|
6236 |
'mapper' => 0.99,
|
|
|
6237 |
'mark iv' => 2.00,
|
|
|
6238 |
'mark v' => 2.22,
|
|
|
6239 |
'mathcad' => 16.00,
|
|
|
6240 |
'Maven' => 1.90,
|
|
|
6241 |
'mdl' => 2.22,
|
|
|
6242 |
'mentor' => 1.51,
|
|
|
6243 |
'mesa' => 0.75,
|
|
|
6244 |
'microfocus cobol' => 1.00,
|
|
|
6245 |
'microforth' => 1.25,
|
|
|
6246 |
'microsoft c' => 0.63,
|
|
|
6247 |
'microstep' => 4.00,
|
|
|
6248 |
'miranda' => 2.00,
|
|
|
6249 |
'model 204' => 2.11,
|
|
|
6250 |
'modula 2' => 1.00,
|
|
|
6251 |
'mosaic' => 13.33,
|
|
|
6252 |
# 'ms c ++ v. 7' => 1.51,
|
|
|
6253 |
'ms compiled basic' => 0.88,
|
|
|
6254 |
'msl' => 1.25,
|
|
|
6255 |
'mulisp' => 1.25,
|
|
|
6256 |
'MUMPS' => 4.21,
|
|
|
6257 |
'Mustache' => 1.75,
|
|
|
6258 |
'Nastran' => 1.13,
|
|
|
6259 |
'natural' => 1.54,
|
|
|
6260 |
'natural 1' => 1.51,
|
|
|
6261 |
'natural 2' => 1.74,
|
|
|
6262 |
'natural construct' => 3.20,
|
|
|
6263 |
'natural language' => 0.03,
|
|
|
6264 |
'netron/cap' => 4.21,
|
|
|
6265 |
'nexpert' => 1.63,
|
|
|
6266 |
'nial' => 1.63,
|
|
|
6267 |
'nomad2' => 2.00,
|
|
|
6268 |
'nonprocedural default' => 2.22,
|
|
|
6269 |
'notes vip' => 2.22,
|
|
|
6270 |
'nroff' => 1.51,
|
|
|
6271 |
'object assembler' => 1.25,
|
|
|
6272 |
'object lisp' => 2.76,
|
|
|
6273 |
'object logo' => 2.76,
|
|
|
6274 |
'object pascal' => 2.76,
|
|
|
6275 |
'object star' => 5.00,
|
|
|
6276 |
'Objective C' => 2.96,
|
|
|
6277 |
'Objective C++' => 2.96,
|
|
|
6278 |
'objectoriented default' => 2.76,
|
|
|
6279 |
'objectview' => 3.20,
|
|
|
6280 |
'OCaml' => 3.00,
|
|
|
6281 |
'ogl' => 1.00,
|
|
|
6282 |
'omnis 7' => 2.00,
|
|
|
6283 |
'oodl' => 2.76,
|
|
|
6284 |
'ops' => 1.74,
|
|
|
6285 |
'ops5' => 1.38,
|
|
|
6286 |
'oracle' => 2.76,
|
|
|
6287 |
'Oracle Reports' => 2.76,
|
|
|
6288 |
'Oracle Forms' => 2.67,
|
|
|
6289 |
'Oracle Developer/2000' => 3.48,
|
|
|
6290 |
'oscar' => 0.75,
|
|
|
6291 |
'pacbase' => 1.67,
|
|
|
6292 |
'pace' => 2.00,
|
|
|
6293 |
'paradox/pal' => 2.22,
|
|
|
6294 |
'Pascal' => 0.88,
|
|
|
6295 |
'Patran Command Language' => 2.50,
|
|
|
6296 |
'pc focus' => 2.22,
|
|
|
6297 |
'pdl millenium' => 3.81,
|
|
|
6298 |
'pdp11 ade' => 1.51,
|
|
|
6299 |
'peoplesoft' => 2.50,
|
|
|
6300 |
'Perl' => 4.00,
|
|
|
6301 |
'persistance object builder' => 3.81,
|
|
|
6302 |
'Pig Latin' => 1.00,
|
|
|
6303 |
'pilot' => 1.51,
|
|
|
6304 |
'PL/I' => 1.38,
|
|
|
6305 |
'pl/1' => 1.38,
|
|
|
6306 |
'pl/m' => 1.13,
|
|
|
6307 |
'pl/s' => 0.88,
|
|
|
6308 |
'pl/sql' => 2.58,
|
|
|
6309 |
'planit' => 1.51,
|
|
|
6310 |
'planner' => 1.25,
|
|
|
6311 |
'planperfect 1' => 11.43,
|
|
|
6312 |
'plato' => 1.51,
|
|
|
6313 |
'polyforth' => 1.25,
|
|
|
6314 |
'pop' => 1.38,
|
|
|
6315 |
'poplog' => 1.38,
|
|
|
6316 |
'power basic' => 1.63,
|
|
|
6317 |
'powerbuilder' => 3.33,
|
|
|
6318 |
'powerhouse' => 5.71,
|
|
|
6319 |
'PowerShell' => 3.00,
|
|
|
6320 |
'ppl (plus)' => 2.00,
|
|
|
6321 |
'problemoriented default' => 1.13,
|
|
|
6322 |
'proc' => 2.96,
|
|
|
6323 |
'procedural default' => 0.75,
|
|
|
6324 |
'professional pascal' => 0.88,
|
|
|
6325 |
'program generator default' => 5.00,
|
|
|
6326 |
'progress v4' => 2.22,
|
|
|
6327 |
'proiv' => 1.38,
|
|
|
6328 |
'Prolog' => 1.25,
|
|
|
6329 |
'prose' => 0.75,
|
|
|
6330 |
'proteus' => 0.75,
|
|
|
6331 |
'Protocol Buffers' => 2.00,
|
|
|
6332 |
'Puppet' => 2.00,
|
|
|
6333 |
'PureScript' => 2.00,
|
|
|
6334 |
'qbasic' => 1.38,
|
|
|
6335 |
'qbe' => 6.15,
|
|
|
6336 |
'qmf' => 5.33,
|
|
|
6337 |
'QML' => 1.25,
|
|
|
6338 |
'Qt Project' => 1.00,
|
|
|
6339 |
'qnial' => 1.63,
|
|
|
6340 |
'quattro' => 13.33,
|
|
|
6341 |
'quattro pro' => 13.33,
|
|
|
6342 |
'query default' => 6.15,
|
|
|
6343 |
'quick basic 1' => 1.25,
|
|
|
6344 |
'quick basic 2' => 1.31,
|
|
|
6345 |
'quick basic 3' => 1.38,
|
|
|
6346 |
'quick c' => 0.63,
|
|
|
6347 |
'quickbuild' => 2.86,
|
|
|
6348 |
'quiz' => 5.33,
|
|
|
6349 |
'R' => 3.00,
|
|
|
6350 |
'Racket' => 1.50,
|
|
|
6351 |
'rally' => 2.00,
|
|
|
6352 |
'ramis ii' => 2.00,
|
|
|
6353 |
'rapidgen' => 2.86,
|
|
|
6354 |
'ratfor' => 0.88,
|
|
|
6355 |
'rdb' => 2.00,
|
|
|
6356 |
'realia' => 1.74,
|
|
|
6357 |
'realizer 1.0' => 2.00,
|
|
|
6358 |
'realizer 2.0' => 2.22,
|
|
|
6359 |
'relate/3000' => 2.00,
|
|
|
6360 |
'reuse default' => 16.00,
|
|
|
6361 |
'Razor' => 2.00,
|
|
|
6362 |
'Rexx' => 1.19,
|
|
|
6363 |
'rm basic' => 0.88,
|
|
|
6364 |
'rm cobol' => 0.75,
|
|
|
6365 |
'rm fortran' => 0.75,
|
|
|
6366 |
'RobotFramework' => 2.50,
|
|
|
6367 |
'rpg i' => 1.00,
|
|
|
6368 |
'rpg ii' => 1.63,
|
|
|
6369 |
'rpg iii' => 1.63,
|
|
|
6370 |
'rtexpert 1.4' => 1.38,
|
|
|
6371 |
'Rust' => 1.00,
|
|
|
6372 |
'sabretalk' => 0.90,
|
|
|
6373 |
'sail' => 0.75,
|
|
|
6374 |
'sapiens' => 5.00,
|
|
|
6375 |
'sas' => 1.95,
|
|
|
6376 |
'savvy' => 6.15,
|
|
|
6377 |
'sbasic' => 0.88,
|
|
|
6378 |
'Scala' => 4.10,
|
|
|
6379 |
'sceptre' => 1.13,
|
|
|
6380 |
'scheme' => 1.51,
|
|
|
6381 |
'screen painter default' => 13.33,
|
|
|
6382 |
'sequal' => 6.67,
|
|
|
6383 |
'Bourne Shell' => 3.81,
|
|
|
6384 |
'Bourne Again Shell' => 3.81,
|
|
|
6385 |
'ksh' => 3.81,
|
|
|
6386 |
'C Shell' => 3.81,
|
|
|
6387 |
'siebel tools ' => 6.15,
|
|
|
6388 |
'SAS' => 1.5 ,
|
|
|
6389 |
'SASS' => 1.5 ,
|
|
|
6390 |
'simplan' => 2.22,
|
|
|
6391 |
'simscript' => 1.74,
|
|
|
6392 |
'simula' => 1.74,
|
|
|
6393 |
'simula 67' => 1.74,
|
|
|
6394 |
'simulation default' => 1.74,
|
|
|
6395 |
'SKILL' => 2.00,
|
|
|
6396 |
'SKILL++' => 2.00,
|
|
|
6397 |
'slogan' => 0.98,
|
|
|
6398 |
'smalltalk' => 2.50,
|
|
|
6399 |
'smalltalk 286' => 3.81,
|
|
|
6400 |
'smalltalk 80' => 3.81,
|
|
|
6401 |
'smalltalk/v' => 3.81,
|
|
|
6402 |
'Smarty' => 3.50,
|
|
|
6403 |
'snap' => 1.00,
|
|
|
6404 |
'snobol24' => 0.63,
|
|
|
6405 |
'softscreen' => 5.71,
|
|
|
6406 |
'Softbridge Basic' => 2.76,
|
|
|
6407 |
'solo' => 1.38,
|
|
|
6408 |
'speakeasy' => 2.22,
|
|
|
6409 |
'spinnaker ppl' => 2.22,
|
|
|
6410 |
'splus' => 2.50,
|
|
|
6411 |
'spreadsheet default' => 13.33,
|
|
|
6412 |
'sps' => 0.25,
|
|
|
6413 |
'spss' => 2.50,
|
|
|
6414 |
'SQL' => 2.29,
|
|
|
6415 |
'sqlwindows' => 6.67,
|
|
|
6416 |
'statistical default' => 2.50,
|
|
|
6417 |
'Standard ML' => 3.00,
|
|
|
6418 |
'strategem' => 2.22,
|
|
|
6419 |
'stress' => 1.13,
|
|
|
6420 |
'strongly typed default' => 0.88,
|
|
|
6421 |
'style' => 1.74,
|
|
|
6422 |
'superbase 1.3' => 2.22,
|
|
|
6423 |
'surpass' => 13.33,
|
|
|
6424 |
'Swift' => 2.50,
|
|
|
6425 |
'sybase' => 2.00,
|
|
|
6426 |
'symantec c++' => 2.76,
|
|
|
6427 |
'symbolang' => 1.25,
|
|
|
6428 |
'synchroworks' => 4.44,
|
|
|
6429 |
'synon/2e' => 4.21,
|
|
|
6430 |
'systemw' => 2.22,
|
|
|
6431 |
'tandem access language' => 0.88,
|
|
|
6432 |
'Tcl/Tk' => 4.00,
|
|
|
6433 |
'Teamcenter def' => 1.00,
|
|
|
6434 |
'Teamcenter met' => 1.00,
|
|
|
6435 |
'Teamcenter mth' => 1.00,
|
|
|
6436 |
'telon' => 5.00,
|
|
|
6437 |
'tessaract' => 2.00,
|
|
|
6438 |
'the twin' => 13.33,
|
|
|
6439 |
'themis' => 6.15,
|
|
|
6440 |
'tiief' => 5.71,
|
|
|
6441 |
'Titanium Style Sheet' => 2.00,
|
|
|
6442 |
'topspeed c++' => 2.76,
|
|
|
6443 |
'transform' => 5.33,
|
|
|
6444 |
'translisp plus' => 1.43,
|
|
|
6445 |
'treet' => 1.25,
|
|
|
6446 |
'treetran' => 1.25,
|
|
|
6447 |
'trs80 basic' => 0.63,
|
|
|
6448 |
'true basic' => 1.25,
|
|
|
6449 |
'turbo c' => 0.63,
|
|
|
6450 |
'turbo expert' => 1.63,
|
|
|
6451 |
'turbo pascal >5' => 1.63,
|
|
|
6452 |
'turbo pascal 14' => 1.00,
|
|
|
6453 |
'turbo pascal 45' => 1.13,
|
|
|
6454 |
'turing' => 1.00,
|
|
|
6455 |
'tutor' => 1.51,
|
|
|
6456 |
'twaice' => 1.63,
|
|
|
6457 |
'TypeScript' => 2.00,
|
|
|
6458 |
'ucsd pascal' => 0.88,
|
|
|
6459 |
'ufo/ims' => 2.22,
|
|
|
6460 |
'uhelp' => 2.50,
|
|
|
6461 |
'uniface' => 5.00,
|
|
|
6462 |
'Unity-Prefab' => 2.50,
|
|
|
6463 |
'Vala' => 1.50,
|
|
|
6464 |
'Vala Header' => 1.40,
|
|
|
6465 |
'vax acms' => 1.38,
|
|
|
6466 |
'vax ade' => 2.00,
|
|
|
6467 |
'vbscript' => 2.35,
|
|
|
6468 |
'vectran' => 0.75,
|
|
|
6469 |
'Verilog-SystemVerilog' => 1.51,
|
|
|
6470 |
'VHDL' => 4.21,
|
|
|
6471 |
'vim script' => 3.00,
|
|
|
6472 |
'visible c' => 1.63,
|
|
|
6473 |
'visible cobol' => 2.00,
|
|
|
6474 |
'visicalc 1' => 8.89,
|
|
|
6475 |
'visual 4.0' => 2.76,
|
|
|
6476 |
'visual basic' => 1.90,
|
|
|
6477 |
'visual basic 1' => 1.74,
|
|
|
6478 |
'visual basic 2' => 1.86,
|
|
|
6479 |
'visual basic 3' => 2.00,
|
|
|
6480 |
'visual basic 4' => 2.22,
|
|
|
6481 |
'visual basic 5' => 2.76,
|
|
|
6482 |
'Visual Basic' => 2.76,
|
|
|
6483 |
'visual basic dos' => 2.00,
|
|
|
6484 |
'visual c++' => 2.35,
|
|
|
6485 |
'visual cobol' => 4.00,
|
|
|
6486 |
'Visual Fox Pro' => 4.00, # Visual Fox Pro is not available in the language gearing ratios listed at Mayes Consulting web site
|
|
|
6487 |
'visual objects' => 5.00,
|
|
|
6488 |
'visualage' => 3.81,
|
|
|
6489 |
'Visualforce Component' => 1.9 ,
|
|
|
6490 |
'Visualforce Page' => 1.9 ,
|
|
|
6491 |
'visualgen' => 4.44,
|
|
|
6492 |
'VM' => 2.00,
|
|
|
6493 |
'vpf' => 0.84,
|
|
|
6494 |
'vulcan' => 1.25,
|
|
|
6495 |
'vz programmer' => 2.22,
|
|
|
6496 |
'warp x' => 2.00,
|
|
|
6497 |
'watcom c' => 0.63,
|
|
|
6498 |
'watcom c/386' => 0.63,
|
|
|
6499 |
'waterloo c' => 0.63,
|
|
|
6500 |
'waterloo pascal' => 0.88,
|
|
|
6501 |
'watfiv' => 0.94,
|
|
|
6502 |
'watfor' => 0.88,
|
|
|
6503 |
'web scripts' => 5.33,
|
|
|
6504 |
'whip' => 0.88,
|
|
|
6505 |
'Windows Message File' => 1.00,
|
|
|
6506 |
'Windows Resource File' => 1.00,
|
|
|
6507 |
'Windows Module Definition' => 1.00,
|
|
|
6508 |
'WiX source' => 1.90,
|
|
|
6509 |
'WiX include' => 1.90,
|
|
|
6510 |
'WiX string localization' => 1.90,
|
|
|
6511 |
'wizard' => 2.86,
|
|
|
6512 |
'xBase' => 2.00,
|
|
|
6513 |
'xBase Header' => 2.00,
|
|
|
6514 |
'xlisp' => 1.25,
|
|
|
6515 |
'XAML' => 1.90,
|
|
|
6516 |
'XQuery' => 2.50,
|
|
|
6517 |
'yacc' => 1.51,
|
|
|
6518 |
'yacc++' => 1.51,
|
|
|
6519 |
'YAML' => 0.90,
|
|
|
6520 |
'zbasic' => 0.88,
|
|
|
6521 |
'zim' => 4.21,
|
|
|
6522 |
'zlisp' => 1.25,
|
|
|
6523 |
'Expect' => 2.00,
|
|
|
6524 |
'C/C++ Header' => 1.00,
|
|
|
6525 |
'inc' => 1.00,
|
|
|
6526 |
'lex' => 1.00,
|
|
|
6527 |
'Julia' => 4.00,
|
|
|
6528 |
'MATLAB' => 4.00,
|
|
|
6529 |
'Mercury' => 3.00,
|
|
|
6530 |
'Maven/XML' => 2.5,
|
|
|
6531 |
'IDL' => 3.80,
|
|
|
6532 |
'Octave' => 4.00,
|
|
|
6533 |
'ML' => 3.00,
|
|
|
6534 |
'Modula3' => 2.00,
|
|
|
6535 |
'PHP' => 3.50,
|
|
|
6536 |
'Python' => 4.20,
|
|
|
6537 |
'Cython' => 3.80,
|
|
|
6538 |
'Ruby' => 4.20,
|
|
|
6539 |
'Ruby HTML' => 4.00,
|
|
|
6540 |
'sed' => 4.00,
|
|
|
6541 |
'Lua' => 4.00,
|
|
|
6542 |
'OpenCL' => 1.50,
|
|
|
6543 |
# 'Lisp/Julia' => 4.00,
|
|
|
6544 |
# 'Lisp/OpenCL' => 1.50,
|
|
|
6545 |
# 'MATLAB/Objective C/MUMPS/Mercury' => 3.00,
|
|
|
6546 |
);
|
|
|
6547 |
# 1}}}
|
|
|
6548 |
%{$rh_Known_Binary_Archives} = ( # {{{1
|
|
|
6549 |
'.tar' => 1 ,
|
|
|
6550 |
'.tar.Z' => 1 ,
|
|
|
6551 |
'.tar.gz' => 1 ,
|
|
|
6552 |
'.tar.bz2' => 1 ,
|
|
|
6553 |
'.zip' => 1 ,
|
|
|
6554 |
'.Zip' => 1 ,
|
|
|
6555 |
'.ZIP' => 1 ,
|
|
|
6556 |
'.ear' => 1 , # Java
|
|
|
6557 |
'.war' => 1 , # contained within .ear
|
|
|
6558 |
'.xz' => 1 ,
|
|
|
6559 |
);
|
|
|
6560 |
# 1}}}
|
|
|
6561 |
} # end sub set_constants()
|
|
|
6562 |
sub check_scale_existence { # {{{1
|
|
|
6563 |
# do a few sanity checks
|
|
|
6564 |
my ($rhaa_Filters_by_Language,
|
|
|
6565 |
$rh_Language_by_Extension,
|
|
|
6566 |
$rh_Scale_Factor) = @_;
|
|
|
6567 |
|
|
|
6568 |
my %extension_collisions = (
|
|
|
6569 |
# TODO: find a better way of dealing with these
|
|
|
6570 |
"PHP/Pascal" => 1,
|
|
|
6571 |
"Lisp/OpenCL" => 1,
|
|
|
6572 |
"Lisp/Julia" => 1,
|
|
|
6573 |
"MATLAB/Objective C/MUMPS/Mercury" => 1,
|
|
|
6574 |
"Pascal/Puppet" => 1,
|
|
|
6575 |
"Perl/Prolog" => 1,
|
|
|
6576 |
"IDL/Qt Project/Prolog" => 1,
|
|
|
6577 |
"D/dtrace" => 1,
|
|
|
6578 |
);
|
|
|
6579 |
my $OK = 1;
|
|
|
6580 |
foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
|
|
|
6581 |
next if defined $extension_collisions{$language};
|
|
|
6582 |
if (!defined $rh_Scale_Factor->{$language}) {
|
|
|
6583 |
$OK = 0;
|
|
|
6584 |
warn "Missing scale factor for $language\n";
|
|
|
6585 |
}
|
|
|
6586 |
}
|
|
|
6587 |
|
|
|
6588 |
my %seen_it = ();
|
|
|
6589 |
foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
|
|
|
6590 |
my $language = $rh_Language_by_Extension->{$ext};
|
|
|
6591 |
next if defined $extension_collisions{$language};
|
|
|
6592 |
next if $seen_it{$language};
|
|
|
6593 |
if (!@{$rhaa_Filters_by_Language->{$language}}) {
|
|
|
6594 |
$OK = 0;
|
|
|
6595 |
warn "Missing language filter for $language\n";
|
|
|
6596 |
}
|
|
|
6597 |
$seen_it{$language} = 1;
|
|
|
6598 |
}
|
|
|
6599 |
die unless $OK;
|
|
|
6600 |
} # 1}}}
|
|
|
6601 |
sub Install_Regexp_Common { # {{{1
|
|
|
6602 |
# Installs portions of Damian Conway's & Abigail's Regexp::Common
|
|
|
6603 |
# module, v2.120, into a temporary directory for the duration of
|
|
|
6604 |
# this run.
|
|
|
6605 |
|
|
|
6606 |
my %Regexp_Common_Contents = ();
|
|
|
6607 |
$Regexp_Common_Contents{'Common'} = <<'EOCommon'; # {{{2
|
|
|
6608 |
package Regexp::Common;
|
|
|
6609 |
|
|
|
6610 |
use 5.00473;
|
|
|
6611 |
use strict;
|
|
|
6612 |
|
|
|
6613 |
local $^W = 1;
|
|
|
6614 |
|
|
|
6615 |
use vars qw /$VERSION %RE %sub_interface $AUTOLOAD/;
|
|
|
6616 |
|
|
|
6617 |
($VERSION) = q $Revision: 2.120 $ =~ /([\d.]+)/;
|
|
|
6618 |
|
|
|
6619 |
|
|
|
6620 |
sub _croak {
|
|
|
6621 |
require Carp;
|
|
|
6622 |
goto &Carp::croak;
|
|
|
6623 |
}
|
|
|
6624 |
|
|
|
6625 |
sub _carp {
|
|
|
6626 |
require Carp;
|
|
|
6627 |
goto &Carp::carp;
|
|
|
6628 |
}
|
|
|
6629 |
|
|
|
6630 |
sub new {
|
|
|
6631 |
my ($class, @data) = @_;
|
|
|
6632 |
my %self;
|
|
|
6633 |
tie %self, $class, @data;
|
|
|
6634 |
return \%self;
|
|
|
6635 |
}
|
|
|
6636 |
|
|
|
6637 |
sub TIEHASH {
|
|
|
6638 |
my ($class, @data) = @_;
|
|
|
6639 |
bless \@data, $class;
|
|
|
6640 |
}
|
|
|
6641 |
|
|
|
6642 |
sub FETCH {
|
|
|
6643 |
my ($self, $extra) = @_;
|
|
|
6644 |
return bless ref($self)->new(@$self, $extra), ref($self);
|
|
|
6645 |
}
|
|
|
6646 |
|
|
|
6647 |
# Modification for cloc: only need a few modules from Regexp::Common.
|
|
|
6648 |
my %imports = map {$_ => "Regexp::Common::$_"}
|
|
|
6649 |
qw /balanced comment delimited /;
|
|
|
6650 |
#my %imports = map {$_ => "Regexp::Common::$_"}
|
|
|
6651 |
# qw /balanced CC comment delimited lingua list
|
|
|
6652 |
# net number profanity SEN URI whitespace
|
|
|
6653 |
# zip/;
|
|
|
6654 |
|
|
|
6655 |
sub import {
|
|
|
6656 |
shift; # Shift off the class.
|
|
|
6657 |
tie %RE, __PACKAGE__;
|
|
|
6658 |
{
|
|
|
6659 |
no strict 'refs';
|
|
|
6660 |
*{caller() . "::RE"} = \%RE;
|
|
|
6661 |
}
|
|
|
6662 |
|
|
|
6663 |
my $saw_import;
|
|
|
6664 |
my $no_defaults;
|
|
|
6665 |
my %exclude;
|
|
|
6666 |
foreach my $entry (grep {!/^RE_/} @_) {
|
|
|
6667 |
if ($entry eq 'pattern') {
|
|
|
6668 |
no strict 'refs';
|
|
|
6669 |
*{caller() . "::pattern"} = \&pattern;
|
|
|
6670 |
next;
|
|
|
6671 |
}
|
|
|
6672 |
# This used to prevent $; from being set. We still recognize it,
|
|
|
6673 |
# but we won't do anything.
|
|
|
6674 |
if ($entry eq 'clean') {
|
|
|
6675 |
next;
|
|
|
6676 |
}
|
|
|
6677 |
if ($entry eq 'no_defaults') {
|
|
|
6678 |
$no_defaults ++;
|
|
|
6679 |
next;
|
|
|
6680 |
}
|
|
|
6681 |
if (my $module = $imports {$entry}) {
|
|
|
6682 |
$saw_import ++;
|
|
|
6683 |
eval "require $module;";
|
|
|
6684 |
die $@ if $@;
|
|
|
6685 |
next;
|
|
|
6686 |
}
|
|
|
6687 |
if ($entry =~ /^!(.*)/ && $imports {$1}) {
|
|
|
6688 |
$exclude {$1} ++;
|
|
|
6689 |
next;
|
|
|
6690 |
}
|
|
|
6691 |
# As a last resort, try to load the argument.
|
|
|
6692 |
my $module = $entry =~ /^Regexp::Common/
|
|
|
6693 |
? $entry
|
|
|
6694 |
: "Regexp::Common::" . $entry;
|
|
|
6695 |
eval "require $module;";
|
|
|
6696 |
die $@ if $@;
|
|
|
6697 |
}
|
|
|
6698 |
|
|
|
6699 |
unless ($saw_import || $no_defaults) {
|
|
|
6700 |
foreach my $module (values %imports) {
|
|
|
6701 |
next if $exclude {$module};
|
|
|
6702 |
eval "require $module;";
|
|
|
6703 |
die $@ if $@;
|
|
|
6704 |
}
|
|
|
6705 |
}
|
|
|
6706 |
|
|
|
6707 |
my %exported;
|
|
|
6708 |
foreach my $entry (grep {/^RE_/} @_) {
|
|
|
6709 |
if ($entry =~ /^RE_(\w+_)?ALL$/) {
|
|
|
6710 |
my $m = defined $1 ? $1 : "";
|
|
|
6711 |
my $re = qr /^RE_${m}.*$/;
|
|
|
6712 |
while (my ($sub, $interface) = each %sub_interface) {
|
|
|
6713 |
next if $exported {$sub};
|
|
|
6714 |
next unless $sub =~ /$re/;
|
|
|
6715 |
{
|
|
|
6716 |
no strict 'refs';
|
|
|
6717 |
*{caller() . "::$sub"} = $interface;
|
|
|
6718 |
}
|
|
|
6719 |
$exported {$sub} ++;
|
|
|
6720 |
}
|
|
|
6721 |
}
|
|
|
6722 |
else {
|
|
|
6723 |
next if $exported {$entry};
|
|
|
6724 |
_croak "Can't export unknown subroutine &$entry"
|
|
|
6725 |
unless $sub_interface {$entry};
|
|
|
6726 |
{
|
|
|
6727 |
no strict 'refs';
|
|
|
6728 |
*{caller() . "::$entry"} = $sub_interface {$entry};
|
|
|
6729 |
}
|
|
|
6730 |
$exported {$entry} ++;
|
|
|
6731 |
}
|
|
|
6732 |
}
|
|
|
6733 |
}
|
|
|
6734 |
|
|
|
6735 |
sub AUTOLOAD { _croak "Can't $AUTOLOAD" }
|
|
|
6736 |
|
|
|
6737 |
sub DESTROY {}
|
|
|
6738 |
|
|
|
6739 |
my %cache;
|
|
|
6740 |
|
|
|
6741 |
my $fpat = qr/^(-\w+)/;
|
|
|
6742 |
|
|
|
6743 |
sub _decache {
|
|
|
6744 |
my @args = @{tied %{$_[0]}};
|
|
|
6745 |
my @nonflags = grep {!/$fpat/} @args;
|
|
|
6746 |
my $cache = get_cache(@nonflags);
|
|
|
6747 |
_croak "Can't create unknown regex: \$RE{"
|
|
|
6748 |
. join("}{",@args) . "}"
|
|
|
6749 |
unless exists $cache->{__VAL__};
|
|
|
6750 |
_croak "Perl $] does not support the pattern "
|
|
|
6751 |
. "\$RE{" . join("}{",@args)
|
|
|
6752 |
. "}.\nYou need Perl $cache->{__VAL__}{version} or later"
|
|
|
6753 |
unless ($cache->{__VAL__}{version}||0) <= $];
|
|
|
6754 |
my %flags = ( %{$cache->{__VAL__}{default}},
|
|
|
6755 |
map { /$fpat\Q$;\E(.*)/ ? ($1 => $2)
|
|
|
6756 |
: /$fpat/ ? ($1 => undef)
|
|
|
6757 |
: ()
|
|
|
6758 |
} @args);
|
|
|
6759 |
$cache->{__VAL__}->_clone_with(\@args, \%flags);
|
|
|
6760 |
}
|
|
|
6761 |
|
|
|
6762 |
use overload q{""} => \&_decache;
|
|
|
6763 |
|
|
|
6764 |
|
|
|
6765 |
sub get_cache {
|
|
|
6766 |
my $cache = \%cache;
|
|
|
6767 |
foreach (@_) {
|
|
|
6768 |
$cache = $cache->{$_}
|
|
|
6769 |
|| ($cache->{$_} = {});
|
|
|
6770 |
}
|
|
|
6771 |
return $cache;
|
|
|
6772 |
}
|
|
|
6773 |
|
|
|
6774 |
sub croak_version {
|
|
|
6775 |
my ($entry, @args) = @_;
|
|
|
6776 |
}
|
|
|
6777 |
|
|
|
6778 |
sub pattern {
|
|
|
6779 |
my %spec = @_;
|
|
|
6780 |
_croak 'pattern() requires argument: name => [ @list ]'
|
|
|
6781 |
unless $spec{name} && ref $spec{name} eq 'ARRAY';
|
|
|
6782 |
_croak 'pattern() requires argument: create => $sub_ref_or_string'
|
|
|
6783 |
unless $spec{create};
|
|
|
6784 |
|
|
|
6785 |
if (ref $spec{create} ne "CODE") {
|
|
|
6786 |
my $fixed_str = "$spec{create}";
|
|
|
6787 |
$spec{create} = sub { $fixed_str }
|
|
|
6788 |
}
|
|
|
6789 |
|
|
|
6790 |
my @nonflags;
|
|
|
6791 |
my %default;
|
|
|
6792 |
foreach ( @{$spec{name}} ) {
|
|
|
6793 |
if (/$fpat=(.*)/) {
|
|
|
6794 |
$default{$1} = $2;
|
|
|
6795 |
}
|
|
|
6796 |
elsif (/$fpat\s*$/) {
|
|
|
6797 |
$default{$1} = undef;
|
|
|
6798 |
}
|
|
|
6799 |
else {
|
|
|
6800 |
push @nonflags, $_;
|
|
|
6801 |
}
|
|
|
6802 |
}
|
|
|
6803 |
|
|
|
6804 |
my $entry = get_cache(@nonflags);
|
|
|
6805 |
|
|
|
6806 |
if ($entry->{__VAL__}) {
|
|
|
6807 |
_carp "Overriding \$RE{"
|
|
|
6808 |
. join("}{",@nonflags)
|
|
|
6809 |
. "}";
|
|
|
6810 |
}
|
|
|
6811 |
|
|
|
6812 |
$entry->{__VAL__} = bless {
|
|
|
6813 |
create => $spec{create},
|
|
|
6814 |
match => $spec{match} || \&generic_match,
|
|
|
6815 |
subs => $spec{subs} || \&generic_subs,
|
|
|
6816 |
version => $spec{version},
|
|
|
6817 |
default => \%default,
|
|
|
6818 |
}, 'Regexp::Common::Entry';
|
|
|
6819 |
|
|
|
6820 |
foreach (@nonflags) {s/\W/X/g}
|
|
|
6821 |
my $subname = "RE_" . join ("_", @nonflags);
|
|
|
6822 |
$sub_interface{$subname} = sub {
|
|
|
6823 |
push @_ => undef if @_ % 2;
|
|
|
6824 |
my %flags = @_;
|
|
|
6825 |
my $pat = $spec{create}->($entry->{__VAL__},
|
|
|
6826 |
{%default, %flags}, \@nonflags);
|
|
|
6827 |
if (exists $flags{-keep}) { $pat =~ s/\Q(?k:/(/g; }
|
|
|
6828 |
else { $pat =~ s/\Q(?k:/(?:/g; }
|
|
|
6829 |
return exists $flags {-i} ? qr /(?i:$pat)/ : qr/$pat/;
|
|
|
6830 |
};
|
|
|
6831 |
|
|
|
6832 |
return 1;
|
|
|
6833 |
}
|
|
|
6834 |
|
|
|
6835 |
sub generic_match {$_ [1] =~ /$_[0]/}
|
|
|
6836 |
sub generic_subs {$_ [1] =~ s/$_[0]/$_[2]/}
|
|
|
6837 |
|
|
|
6838 |
sub matches {
|
|
|
6839 |
my ($self, $str) = @_;
|
|
|
6840 |
my $entry = $self -> _decache;
|
|
|
6841 |
$entry -> {match} -> ($entry, $str);
|
|
|
6842 |
}
|
|
|
6843 |
|
|
|
6844 |
sub subs {
|
|
|
6845 |
my ($self, $str, $newstr) = @_;
|
|
|
6846 |
my $entry = $self -> _decache;
|
|
|
6847 |
$entry -> {subs} -> ($entry, $str, $newstr);
|
|
|
6848 |
return $str;
|
|
|
6849 |
}
|
|
|
6850 |
|
|
|
6851 |
|
|
|
6852 |
package Regexp::Common::Entry;
|
|
|
6853 |
# use Carp;
|
|
|
6854 |
|
|
|
6855 |
local $^W = 1;
|
|
|
6856 |
|
|
|
6857 |
use overload
|
|
|
6858 |
q{""} => sub {
|
|
|
6859 |
my ($self) = @_;
|
|
|
6860 |
my $pat = $self->{create}->($self, $self->{flags}, $self->{args});
|
|
|
6861 |
if (exists $self->{flags}{-keep}) {
|
|
|
6862 |
$pat =~ s/\Q(?k:/(/g;
|
|
|
6863 |
}
|
|
|
6864 |
else {
|
|
|
6865 |
$pat =~ s/\Q(?k:/(?:/g;
|
|
|
6866 |
}
|
|
|
6867 |
if (exists $self->{flags}{-i}) { $pat = "(?i)$pat" }
|
|
|
6868 |
return $pat;
|
|
|
6869 |
};
|
|
|
6870 |
|
|
|
6871 |
sub _clone_with {
|
|
|
6872 |
my ($self, $args, $flags) = @_;
|
|
|
6873 |
bless { %$self, args=>$args, flags=>$flags }, ref $self;
|
|
|
6874 |
}
|
|
|
6875 |
#
|
|
|
6876 |
# Copyright (c) 2001 - 2005, Damian Conway and Abigail. All Rights
|
|
|
6877 |
# Reserved. This module is free software. It may be used, redistributed
|
|
|
6878 |
# and/or modified under the terms of the Perl Artistic License
|
|
|
6879 |
# (see http://www.perl.com/perl/misc/Artistic.html)
|
|
|
6880 |
EOCommon
|
|
|
6881 |
# 2}}}
|
|
|
6882 |
$Regexp_Common_Contents{'Common/comment'} = <<'EOC'; # {{{2
|
|
|
6883 |
# $Id: comment.pm,v 2.116 2005/03/16 00:00:02 abigail Exp $
|
|
|
6884 |
|
|
|
6885 |
package Regexp::Common::comment;
|
|
|
6886 |
|
|
|
6887 |
use strict;
|
|
|
6888 |
local $^W = 1;
|
|
|
6889 |
|
|
|
6890 |
use Regexp::Common qw /pattern clean no_defaults/;
|
|
|
6891 |
use vars qw /$VERSION/;
|
|
|
6892 |
|
|
|
6893 |
($VERSION) = q $Revision: 2.116 $ =~ /[\d.]+/g;
|
|
|
6894 |
|
|
|
6895 |
my @generic = (
|
|
|
6896 |
{languages => [qw /ABC Forth/],
|
|
|
6897 |
to_eol => ['\\\\']}, # This is for just a *single* backslash.
|
|
|
6898 |
|
|
|
6899 |
{languages => [qw /Ada Alan Eiffel lua/],
|
|
|
6900 |
to_eol => ['--']},
|
|
|
6901 |
|
|
|
6902 |
{languages => [qw /Advisor/],
|
|
|
6903 |
to_eol => ['#|//']},
|
|
|
6904 |
|
|
|
6905 |
{languages => [qw /Advsys CQL Lisp LOGO M MUMPS REBOL Scheme
|
|
|
6906 |
SMITH zonefile/],
|
|
|
6907 |
to_eol => [';']},
|
|
|
6908 |
|
|
|
6909 |
{languages => ['Algol 60'],
|
|
|
6910 |
from_to => [[qw /comment ;/]]},
|
|
|
6911 |
|
|
|
6912 |
{languages => [qw {ALPACA B C C-- LPC PL/I}],
|
|
|
6913 |
from_to => [[qw {/* */}]]},
|
|
|
6914 |
|
|
|
6915 |
{languages => [qw /awk fvwm2 Icon mutt Perl Python QML R Ruby shell Tcl/],
|
|
|
6916 |
to_eol => ['#']},
|
|
|
6917 |
|
|
|
6918 |
{languages => [[BASIC => 'mvEnterprise']],
|
|
|
6919 |
to_eol => ['[*!]|REM']},
|
|
|
6920 |
|
|
|
6921 |
{languages => [qw /Befunge-98 Funge-98 Shelta/],
|
|
|
6922 |
id => [';']},
|
|
|
6923 |
|
|
|
6924 |
{languages => ['beta-Juliet', 'Crystal Report', 'Portia'],
|
|
|
6925 |
to_eol => ['//']},
|
|
|
6926 |
|
|
|
6927 |
{languages => ['BML'],
|
|
|
6928 |
from_to => [['<?_c', '_c?>']],
|
|
|
6929 |
},
|
|
|
6930 |
|
|
|
6931 |
{languages => [qw /C++/, 'C#', qw /Cg ECMAScript FPL Java JavaScript/],
|
|
|
6932 |
to_eol => ['//'],
|
|
|
6933 |
from_to => [[qw {/* */}]]},
|
|
|
6934 |
|
|
|
6935 |
{languages => [qw /CLU LaTeX slrn TeX/],
|
|
|
6936 |
to_eol => ['%']},
|
|
|
6937 |
|
|
|
6938 |
{languages => [qw /False/],
|
|
|
6939 |
from_to => [[qw !{ }!]]},
|
|
|
6940 |
|
|
|
6941 |
{languages => [qw /Fortran/],
|
|
|
6942 |
to_eol => ['!']},
|
|
|
6943 |
|
|
|
6944 |
{languages => [qw /Haifu/],
|
|
|
6945 |
id => [',']},
|
|
|
6946 |
|
|
|
6947 |
{languages => [qw /ILLGOL/],
|
|
|
6948 |
to_eol => ['NB']},
|
|
|
6949 |
|
|
|
6950 |
{languages => [qw /INTERCAL/],
|
|
|
6951 |
to_eol => [q{(?:(?:PLEASE(?:\s+DO)?|DO)\s+)?(?:NOT|N'T)}]},
|
|
|
6952 |
|
|
|
6953 |
{languages => [qw /J/],
|
|
|
6954 |
to_eol => ['NB[.]']},
|
|
|
6955 |
|
|
|
6956 |
{languages => [qw /Nickle/],
|
|
|
6957 |
to_eol => ['#'],
|
|
|
6958 |
from_to => [[qw {/* */}]]},
|
|
|
6959 |
|
|
|
6960 |
{languages => [qw /Oberon/],
|
|
|
6961 |
from_to => [[qw /(* *)/]]},
|
|
|
6962 |
|
|
|
6963 |
{languages => [[qw /Pascal Delphi/], [qw /Pascal Free/], [qw /Pascal GPC/]],
|
|
|
6964 |
to_eol => ['//'],
|
|
|
6965 |
from_to => [[qw !{ }!], [qw !(* *)!]]},
|
|
|
6966 |
|
|
|
6967 |
{languages => [[qw /Pascal Workshop/]],
|
|
|
6968 |
id => [qw /"/],
|
|
|
6969 |
from_to => [[qw !{ }!], [qw !(* *)!], [qw !/* */!]]},
|
|
|
6970 |
|
|
|
6971 |
{languages => [qw /PEARL/],
|
|
|
6972 |
to_eol => ['!'],
|
|
|
6973 |
from_to => [[qw {/* */}]]},
|
|
|
6974 |
|
|
|
6975 |
{languages => [qw /PHP/],
|
|
|
6976 |
to_eol => ['#', '//'],
|
|
|
6977 |
from_to => [[qw {/* */}]]},
|
|
|
6978 |
|
|
|
6979 |
{languages => [qw !PL/B!],
|
|
|
6980 |
to_eol => ['[.;]']},
|
|
|
6981 |
|
|
|
6982 |
{languages => [qw !PL/SQL!],
|
|
|
6983 |
to_eol => ['--'],
|
|
|
6984 |
from_to => [[qw {/* */}]]},
|
|
|
6985 |
|
|
|
6986 |
{languages => [qw /Q-BAL/],
|
|
|
6987 |
to_eol => ['`']},
|
|
|
6988 |
|
|
|
6989 |
{languages => [qw /Smalltalk/],
|
|
|
6990 |
id => ['"']},
|
|
|
6991 |
|
|
|
6992 |
{languages => [qw /SQL/],
|
|
|
6993 |
to_eol => ['-{2,}']},
|
|
|
6994 |
|
|
|
6995 |
{languages => [qw /troff/],
|
|
|
6996 |
to_eol => ['\\\"']},
|
|
|
6997 |
|
|
|
6998 |
{languages => [qw /vi/],
|
|
|
6999 |
to_eol => ['"']},
|
|
|
7000 |
|
|
|
7001 |
{languages => [qw /*W/],
|
|
|
7002 |
from_to => [[qw {|| !!}]]},
|
|
|
7003 |
);
|
|
|
7004 |
|
|
|
7005 |
my @plain_or_nested = (
|
|
|
7006 |
[Caml => undef, "(*" => "*)"],
|
|
|
7007 |
[Dylan => "//", "/*" => "*/"],
|
|
|
7008 |
[Haskell => "-{2,}", "{-" => "-}"],
|
|
|
7009 |
[Hugo => "!(?!\\\\)", "!\\" => "\\!"],
|
|
|
7010 |
[SLIDE => "#", "(*" => "*)"],
|
|
|
7011 |
);
|
|
|
7012 |
|
|
|
7013 |
#
|
|
|
7014 |
# Helper subs.
|
|
|
7015 |
#
|
|
|
7016 |
|
|
|
7017 |
sub combine {
|
|
|
7018 |
local $_ = join "|", @_;
|
|
|
7019 |
if (@_ > 1) {
|
|
|
7020 |
s/\(\?k:/(?:/g;
|
|
|
7021 |
$_ = "(?k:$_)";
|
|
|
7022 |
}
|
|
|
7023 |
$_
|
|
|
7024 |
}
|
|
|
7025 |
|
|
|
7026 |
sub to_eol ($) {"(?k:(?k:$_[0])(?k:[^\\n]*)(?k:\\n))"}
|
|
|
7027 |
sub id ($) {"(?k:(?k:$_[0])(?k:[^$_[0]]*)(?k:$_[0]))"} # One char only!
|
|
|
7028 |
sub from_to {
|
|
|
7029 |
local $^W = 1;
|
|
|
7030 |
my ($begin, $end) = @_;
|
|
|
7031 |
|
|
|
7032 |
my $qb = quotemeta $begin;
|
|
|
7033 |
my $qe = quotemeta $end;
|
|
|
7034 |
my $fe = quotemeta substr $end => 0, 1;
|
|
|
7035 |
my $te = quotemeta substr $end => 1;
|
|
|
7036 |
|
|
|
7037 |
"(?k:(?k:$qb)(?k:(?:[^$fe]+|$fe(?!$te))*)(?k:$qe))";
|
|
|
7038 |
}
|
|
|
7039 |
|
|
|
7040 |
|
|
|
7041 |
my $count = 0;
|
|
|
7042 |
sub nested {
|
|
|
7043 |
local $^W = 1;
|
|
|
7044 |
my ($begin, $end) = @_;
|
|
|
7045 |
|
|
|
7046 |
$count ++;
|
|
|
7047 |
my $r = '(??{$Regexp::Common::comment ['. $count . ']})';
|
|
|
7048 |
|
|
|
7049 |
my $qb = quotemeta $begin;
|
|
|
7050 |
my $qe = quotemeta $end;
|
|
|
7051 |
my $fb = quotemeta substr $begin => 0, 1;
|
|
|
7052 |
my $fe = quotemeta substr $end => 0, 1;
|
|
|
7053 |
|
|
|
7054 |
my $tb = quotemeta substr $begin => 1;
|
|
|
7055 |
my $te = quotemeta substr $end => 1;
|
|
|
7056 |
|
|
|
7057 |
use re 'eval';
|
|
|
7058 |
|
|
|
7059 |
my $re;
|
|
|
7060 |
if ($fb eq $fe) {
|
|
|
7061 |
$re = qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
|
|
|
7062 |
}
|
|
|
7063 |
else {
|
|
|
7064 |
local $" = "|";
|
|
|
7065 |
my @clauses = "(?>[^$fb$fe]+)";
|
|
|
7066 |
push @clauses => "$fb(?!$tb)" if length $tb;
|
|
|
7067 |
push @clauses => "$fe(?!$te)" if length $te;
|
|
|
7068 |
push @clauses => $r;
|
|
|
7069 |
$re = qr /(?:$qb(?:@clauses)*$qe)/;
|
|
|
7070 |
}
|
|
|
7071 |
|
|
|
7072 |
$Regexp::Common::comment [$count] = qr/$re/;
|
|
|
7073 |
}
|
|
|
7074 |
|
|
|
7075 |
#
|
|
|
7076 |
# Process data.
|
|
|
7077 |
#
|
|
|
7078 |
|
|
|
7079 |
foreach my $info (@plain_or_nested) {
|
|
|
7080 |
my ($language, $mark, $begin, $end) = @$info;
|
|
|
7081 |
pattern name => [comment => $language],
|
|
|
7082 |
create =>
|
|
|
7083 |
sub {my $re = nested $begin => $end;
|
|
|
7084 |
my $prefix = defined $mark ? $mark . "[^\n]*\n|" : "";
|
|
|
7085 |
exists $_ [1] -> {-keep} ? qr /($prefix$re)/
|
|
|
7086 |
: qr /$prefix$re/
|
|
|
7087 |
},
|
|
|
7088 |
version => 5.006,
|
|
|
7089 |
;
|
|
|
7090 |
}
|
|
|
7091 |
|
|
|
7092 |
|
|
|
7093 |
foreach my $group (@generic) {
|
|
|
7094 |
my $pattern = combine +(map {to_eol $_} @{$group -> {to_eol}}),
|
|
|
7095 |
(map {from_to @$_} @{$group -> {from_to}}),
|
|
|
7096 |
(map {id $_} @{$group -> {id}}),
|
|
|
7097 |
;
|
|
|
7098 |
foreach my $language (@{$group -> {languages}}) {
|
|
|
7099 |
pattern name => [comment => ref $language ? @$language : $language],
|
|
|
7100 |
create => $pattern,
|
|
|
7101 |
;
|
|
|
7102 |
}
|
|
|
7103 |
}
|
|
|
7104 |
|
|
|
7105 |
|
|
|
7106 |
|
|
|
7107 |
#
|
|
|
7108 |
# Other languages.
|
|
|
7109 |
#
|
|
|
7110 |
|
|
|
7111 |
# http://www.pascal-central.com/docs/iso10206.txt
|
|
|
7112 |
pattern name => [qw /comment Pascal/],
|
|
|
7113 |
create => '(?k:' . '(?k:[{]|[(][*])'
|
|
|
7114 |
. '(?k:[^}*]*(?:[*][^)][^}*]*)*)'
|
|
|
7115 |
. '(?k:[}]|[*][)])'
|
|
|
7116 |
. ')'
|
|
|
7117 |
;
|
|
|
7118 |
|
|
|
7119 |
# http://www.templetons.com/brad/alice/language/
|
|
|
7120 |
pattern name => [qw /comment Pascal Alice/],
|
|
|
7121 |
create => '(?k:(?k:[{])(?k:[^}\n]*)(?k:[}]))'
|
|
|
7122 |
;
|
|
|
7123 |
|
|
|
7124 |
|
|
|
7125 |
# http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt
|
|
|
7126 |
pattern name => [qw (comment), 'Algol 68'],
|
|
|
7127 |
create => q {(?k:(?:#[^#]*#)|} .
|
|
|
7128 |
q {(?:\bco\b(?:[^c]+|\Bc|\bc(?!o\b))*\bco\b)|} .
|
|
|
7129 |
q {(?:\bcomment\b(?:[^c]+|\Bc|\bc(?!omment\b))*\bcomment\b))}
|
|
|
7130 |
;
|
|
|
7131 |
|
|
|
7132 |
|
|
|
7133 |
# See rules 91 and 92 of ISO 8879 (SGML).
|
|
|
7134 |
# Charles F. Goldfarb: "The SGML Handbook".
|
|
|
7135 |
# Oxford: Oxford University Press. 1990. ISBN 0-19-853737-9.
|
|
|
7136 |
# Ch. 10.3, pp 390.
|
|
|
7137 |
pattern name => [qw (comment HTML)],
|
|
|
7138 |
create => q {(?k:(?k:<!)(?k:(?:--(?k:[^-]*(?:-[^-]+)*)--\s*)*)(?k:>))},
|
|
|
7139 |
;
|
|
|
7140 |
|
|
|
7141 |
|
|
|
7142 |
pattern name => [qw /comment SQL MySQL/],
|
|
|
7143 |
create => q {(?k:(?:#|-- )[^\n]*\n|} .
|
|
|
7144 |
q {/\*(?:(?>[^*;"']+)|"[^"]*"|'[^']*'|\*(?!/))*(?:;|\*/))},
|
|
|
7145 |
;
|
|
|
7146 |
|
|
|
7147 |
# Anything that isn't <>[]+-.,
|
|
|
7148 |
# http://home.wxs.nl/~faase009/Ha_BF.html
|
|
|
7149 |
pattern name => [qw /comment Brainfuck/],
|
|
|
7150 |
create => '(?k:[^<>\[\]+\-.,]+)'
|
|
|
7151 |
;
|
|
|
7152 |
|
|
|
7153 |
# Squeak is a variant of Smalltalk-80.
|
|
|
7154 |
# http://www.squeak.
|
|
|
7155 |
# http://mucow.com/squeak-qref.html
|
|
|
7156 |
pattern name => [qw /comment Squeak/],
|
|
|
7157 |
create => '(?k:(?k:")(?k:[^"]*(?:""[^"]*)*)(?k:"))'
|
|
|
7158 |
;
|
|
|
7159 |
|
|
|
7160 |
#
|
|
|
7161 |
# Scores of less than 5 or above 17....
|
|
|
7162 |
# http://www.cliff.biffle.org/esoterica/beatnik.html
|
|
|
7163 |
@Regexp::Common::comment::scores = (1, 3, 3, 2, 1, 4, 2, 4, 1, 8,
|
|
|
7164 |
5, 1, 3, 1, 1, 3, 10, 1, 1, 1,
|
|
|
7165 |
1, 4, 4, 8, 4, 10);
|
|
|
7166 |
pattern name => [qw /comment Beatnik/],
|
|
|
7167 |
create => sub {
|
|
|
7168 |
use re 'eval';
|
|
|
7169 |
my ($s, $x);
|
|
|
7170 |
my $re = qr {\b([A-Za-z]+)\b
|
|
|
7171 |
(?(?{($s, $x) = (0, lc $^N);
|
|
|
7172 |
$s += $Regexp::Common::comment::scores
|
|
|
7173 |
[ord (chop $x) - ord ('a')] while length $x;
|
|
|
7174 |
$s >= 5 && $s < 18})XXX|)}x;
|
|
|
7175 |
$re;
|
|
|
7176 |
},
|
|
|
7177 |
version => 5.008,
|
|
|
7178 |
;
|
|
|
7179 |
|
|
|
7180 |
|
|
|
7181 |
# http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/
|
|
|
7182 |
# (Goto table of contents/3.3 Source Form)
|
|
|
7183 |
# Fortran, in fixed format. Comments start with a C, c or * in the first
|
|
|
7184 |
# column, or a ! anywhere, but the sixth column. Then end with a newline.
|
|
|
7185 |
pattern name => [qw /comment Fortran fixed/],
|
|
|
7186 |
create => '(?k:(?k:(?:^[Cc*]|(?<!^.....)!))(?k:[^\n]*)(?k:\n))'
|
|
|
7187 |
;
|
|
|
7188 |
|
|
|
7189 |
|
|
|
7190 |
# http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm
|
|
|
7191 |
# Traditionally, comments in COBOL were indicated with an asteriks in
|
|
|
7192 |
# the seventh column. Modern compilers may be more lenient.
|
|
|
7193 |
pattern name => [qw /comment COBOL/],
|
|
|
7194 |
create => '(?<=^......)(?k:(?k:[*])(?k:[^\n]*)(?k:\n))',
|
|
|
7195 |
version => '5.008',
|
|
|
7196 |
;
|
|
|
7197 |
|
|
|
7198 |
1;
|
|
|
7199 |
#
|
|
|
7200 |
# Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
|
|
|
7201 |
# This module is free software. It may be used, redistributed
|
|
|
7202 |
# and/or modified under the terms of the Perl Artistic License
|
|
|
7203 |
# (see http://www.perl.com/perl/misc/Artistic.html)
|
|
|
7204 |
EOC
|
|
|
7205 |
# 2}}}
|
|
|
7206 |
$Regexp_Common_Contents{'Common/balanced'} = <<'EOB'; # {{{2
|
|
|
7207 |
package Regexp::Common::balanced; {
|
|
|
7208 |
|
|
|
7209 |
use strict;
|
|
|
7210 |
local $^W = 1;
|
|
|
7211 |
|
|
|
7212 |
use vars qw /$VERSION/;
|
|
|
7213 |
($VERSION) = q $Revision: 2.101 $ =~ /[\d.]+/g;
|
|
|
7214 |
|
|
|
7215 |
use Regexp::Common qw /pattern clean no_defaults/;
|
|
|
7216 |
|
|
|
7217 |
my %closer = ( '{'=>'}', '('=>')', '['=>']', '<'=>'>' );
|
|
|
7218 |
my $count = -1;
|
|
|
7219 |
my %cache;
|
|
|
7220 |
|
|
|
7221 |
sub nested {
|
|
|
7222 |
local $^W = 1;
|
|
|
7223 |
my ($start, $finish) = @_;
|
|
|
7224 |
|
|
|
7225 |
return $Regexp::Common::balanced [$cache {$start} {$finish}]
|
|
|
7226 |
if exists $cache {$start} {$finish};
|
|
|
7227 |
|
|
|
7228 |
$count ++;
|
|
|
7229 |
my $r = '(??{$Regexp::Common::balanced ['. $count . ']})';
|
|
|
7230 |
|
|
|
7231 |
my @starts = map {s/\\(.)/$1/g; $_} grep {length}
|
|
|
7232 |
$start =~ /([^|\\]+|\\.)+/gs;
|
|
|
7233 |
my @finishes = map {s/\\(.)/$1/g; $_} grep {length}
|
|
|
7234 |
$finish =~ /([^|\\]+|\\.)+/gs;
|
|
|
7235 |
|
|
|
7236 |
push @finishes => ($finishes [-1]) x (@starts - @finishes);
|
|
|
7237 |
|
|
|
7238 |
my @re;
|
|
|
7239 |
local $" = "|";
|
|
|
7240 |
foreach my $begin (@starts) {
|
|
|
7241 |
my $end = shift @finishes;
|
|
|
7242 |
|
|
|
7243 |
my $qb = quotemeta $begin;
|
|
|
7244 |
my $qe = quotemeta $end;
|
|
|
7245 |
my $fb = quotemeta substr $begin => 0, 1;
|
|
|
7246 |
my $fe = quotemeta substr $end => 0, 1;
|
|
|
7247 |
|
|
|
7248 |
my $tb = quotemeta substr $begin => 1;
|
|
|
7249 |
my $te = quotemeta substr $end => 1;
|
|
|
7250 |
|
|
|
7251 |
use re 'eval';
|
|
|
7252 |
|
|
|
7253 |
my $add;
|
|
|
7254 |
if ($fb eq $fe) {
|
|
|
7255 |
push @re =>
|
|
|
7256 |
qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
|
|
|
7257 |
}
|
|
|
7258 |
else {
|
|
|
7259 |
my @clauses = "(?>[^$fb$fe]+)";
|
|
|
7260 |
push @clauses => "$fb(?!$tb)" if length $tb;
|
|
|
7261 |
push @clauses => "$fe(?!$te)" if length $te;
|
|
|
7262 |
push @clauses => $r;
|
|
|
7263 |
push @re => qr /(?:$qb(?:@clauses)*$qe)/;
|
|
|
7264 |
}
|
|
|
7265 |
}
|
|
|
7266 |
|
|
|
7267 |
$cache {$start} {$finish} = $count;
|
|
|
7268 |
$Regexp::Common::balanced [$count] = qr/@re/;
|
|
|
7269 |
}
|
|
|
7270 |
|
|
|
7271 |
|
|
|
7272 |
pattern name => [qw /balanced -parens=() -begin= -end=/],
|
|
|
7273 |
create => sub {
|
|
|
7274 |
my $flag = $_[1];
|
|
|
7275 |
unless (defined $flag -> {-begin} && length $flag -> {-begin} &&
|
|
|
7276 |
defined $flag -> {-end} && length $flag -> {-end}) {
|
|
|
7277 |
my @open = grep {index ($flag->{-parens}, $_) >= 0}
|
|
|
7278 |
('[','(','{','<');
|
|
|
7279 |
my @close = map {$closer {$_}} @open;
|
|
|
7280 |
$flag -> {-begin} = join "|" => @open;
|
|
|
7281 |
$flag -> {-end} = join "|" => @close;
|
|
|
7282 |
}
|
|
|
7283 |
my $pat = nested @$flag {qw /-begin -end/};
|
|
|
7284 |
return exists $flag -> {-keep} ? qr /($pat)/ : $pat;
|
|
|
7285 |
},
|
|
|
7286 |
version => 5.006,
|
|
|
7287 |
;
|
|
|
7288 |
|
|
|
7289 |
}
|
|
|
7290 |
|
|
|
7291 |
1;
|
|
|
7292 |
#
|
|
|
7293 |
# Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
|
|
|
7294 |
# This module is free software. It may be used, redistributed
|
|
|
7295 |
# and/or modified under the terms of the Perl Artistic License
|
|
|
7296 |
# (see http://www.perl.com/perl/misc/Artistic.html)
|
|
|
7297 |
EOB
|
|
|
7298 |
# 2}}}
|
|
|
7299 |
$Regexp_Common_Contents{'Common/delimited'} = <<'EOD'; # {{{2
|
|
|
7300 |
# $Id: delimited.pm,v 2.104 2005/03/16 00:22:45 abigail Exp $
|
|
|
7301 |
|
|
|
7302 |
package Regexp::Common::delimited;
|
|
|
7303 |
|
|
|
7304 |
use strict;
|
|
|
7305 |
local $^W = 1;
|
|
|
7306 |
|
|
|
7307 |
use Regexp::Common qw /pattern clean no_defaults/;
|
|
|
7308 |
use vars qw /$VERSION/;
|
|
|
7309 |
|
|
|
7310 |
($VERSION) = q $Revision: 2.104 $ =~ /[\d.]+/g;
|
|
|
7311 |
|
|
|
7312 |
sub gen_delimited {
|
|
|
7313 |
|
|
|
7314 |
my ($dels, $escs) = @_;
|
|
|
7315 |
# return '(?:\S*)' unless $dels =~ /\S/;
|
|
|
7316 |
if (length $escs) {
|
|
|
7317 |
$escs .= substr ($escs, -1) x (length ($dels) - length ($escs));
|
|
|
7318 |
}
|
|
|
7319 |
my @pat = ();
|
|
|
7320 |
my $i;
|
|
|
7321 |
for ($i=0; $i < length $dels; $i++) {
|
|
|
7322 |
my $del = quotemeta substr ($dels, $i, 1);
|
|
|
7323 |
my $esc = length($escs) ? quotemeta substr ($escs, $i, 1) : "";
|
|
|
7324 |
if ($del eq $esc) {
|
|
|
7325 |
push @pat,
|
|
|
7326 |
"(?k:$del)(?k:[^$del]*(?:(?:$del$del)[^$del]*)*)(?k:$del)";
|
|
|
7327 |
}
|
|
|
7328 |
elsif (length $esc) {
|
|
|
7329 |
push @pat,
|
|
|
7330 |
"(?k:$del)(?k:[^$esc$del]*(?:$esc.[^$esc$del]*)*)(?k:$del)";
|
|
|
7331 |
}
|
|
|
7332 |
else {
|
|
|
7333 |
push @pat, "(?k:$del)(?k:[^$del]*)(?k:$del)";
|
|
|
7334 |
}
|
|
|
7335 |
}
|
|
|
7336 |
my $pat = join '|', @pat;
|
|
|
7337 |
return "(?k:$pat)";
|
|
|
7338 |
}
|
|
|
7339 |
|
|
|
7340 |
sub _croak {
|
|
|
7341 |
require Carp;
|
|
|
7342 |
goto &Carp::croak;
|
|
|
7343 |
}
|
|
|
7344 |
|
|
|
7345 |
pattern name => [qw( delimited -delim= -esc=\\ )],
|
|
|
7346 |
create => sub {my $flags = $_[1];
|
|
|
7347 |
_croak 'Must specify delimiter in $RE{delimited}'
|
|
|
7348 |
unless length $flags->{-delim};
|
|
|
7349 |
return gen_delimited (@{$flags}{-delim, -esc});
|
|
|
7350 |
},
|
|
|
7351 |
;
|
|
|
7352 |
|
|
|
7353 |
pattern name => [qw( quoted -esc=\\ )],
|
|
|
7354 |
create => sub {my $flags = $_[1];
|
|
|
7355 |
return gen_delimited (q{"'`}, $flags -> {-esc});
|
|
|
7356 |
},
|
|
|
7357 |
;
|
|
|
7358 |
|
|
|
7359 |
|
|
|
7360 |
1;
|
|
|
7361 |
#
|
|
|
7362 |
# Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
|
|
|
7363 |
# This module is free software. It may be used, redistributed
|
|
|
7364 |
# and/or modified under the terms of the Perl Artistic License
|
|
|
7365 |
# (see http://www.perl.com/perl/misc/Artistic.html)
|
|
|
7366 |
EOD
|
|
|
7367 |
# 2}}}
|
|
|
7368 |
my $problems = 0;
|
|
|
7369 |
$HAVE_Rexexp_Common = 0;
|
|
|
7370 |
my $dir = "";
|
|
|
7371 |
if ($opt_sdir) {
|
|
|
7372 |
++$TEMP_OFF;
|
|
|
7373 |
$dir = "$opt_sdir/$TEMP_OFF";
|
|
|
7374 |
File::Path::rmtree($dir) if is_dir($dir);
|
|
|
7375 |
File::Path::mkpath($dir) unless is_dir($dir);
|
|
|
7376 |
} else {
|
|
|
7377 |
# let File::Temp create a suitable temporary directory
|
|
|
7378 |
$dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
|
7379 |
$TEMP_INST{ $dir } = "Regexp::Common";
|
|
|
7380 |
}
|
|
|
7381 |
print "Using temp dir [$dir] to install Regexp::Common\n" if $opt_v;
|
|
|
7382 |
my $Regexp_dir = "$dir/Regexp";
|
|
|
7383 |
my $Regexp_Common_dir = "$dir/Regexp/Common";
|
|
|
7384 |
mkdir $Regexp_dir ;
|
|
|
7385 |
mkdir $Regexp_Common_dir;
|
|
|
7386 |
|
|
|
7387 |
foreach my $module_file (keys %Regexp_Common_Contents) {
|
|
|
7388 |
my $OUT = new IO::File "$dir/Regexp/${module_file}.pm", "w";
|
|
|
7389 |
if (defined $OUT) {
|
|
|
7390 |
print $OUT $Regexp_Common_Contents{$module_file};
|
|
|
7391 |
$OUT->close;
|
|
|
7392 |
} else {
|
|
|
7393 |
warn "Failed to install Regexp::${module_file}.pm\n";
|
|
|
7394 |
$problems = 1;
|
|
|
7395 |
}
|
|
|
7396 |
}
|
|
|
7397 |
|
|
|
7398 |
push @INC, $dir;
|
|
|
7399 |
eval "use Regexp::Common qw /comment RE_comment_HTML balanced/";
|
|
|
7400 |
$HAVE_Rexexp_Common = 1 unless $problems;
|
|
|
7401 |
} # 1}}}
|
|
|
7402 |
sub Install_Algorithm_Diff { # {{{1
|
|
|
7403 |
# Installs Tye McQueen's Algorithm::Diff module, v1.1902, into a
|
|
|
7404 |
# temporary directory for the duration of this run.
|
|
|
7405 |
|
|
|
7406 |
my $Algorithm_Diff_Contents = <<'EOAlgDiff'; # {{{2
|
|
|
7407 |
package Algorithm::Diff;
|
|
|
7408 |
# Skip to first "=head" line for documentation.
|
|
|
7409 |
use strict;
|
|
|
7410 |
|
|
|
7411 |
use integer; # see below in _replaceNextLargerWith() for mod to make
|
|
|
7412 |
# if you don't use this
|
|
|
7413 |
use vars qw( $VERSION @EXPORT_OK );
|
|
|
7414 |
$VERSION = 1.19_02;
|
|
|
7415 |
# ^ ^^ ^^-- Incremented at will
|
|
|
7416 |
# | \+----- Incremented for non-trivial changes to features
|
|
|
7417 |
# \-------- Incremented for fundamental changes
|
|
|
7418 |
require Exporter;
|
|
|
7419 |
*import = \&Exporter::import;
|
|
|
7420 |
@EXPORT_OK = qw(
|
|
|
7421 |
prepare LCS LCSidx LCS_length
|
|
|
7422 |
diff sdiff compact_diff
|
|
|
7423 |
traverse_sequences traverse_balanced
|
|
|
7424 |
);
|
|
|
7425 |
|
|
|
7426 |
# McIlroy-Hunt diff algorithm
|
|
|
7427 |
# Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
|
|
|
7428 |
# by Ned Konz, perl@bike-nomad.com
|
|
|
7429 |
# Updates by Tye McQueen, http://perlmonks.org/?node=tye
|
|
|
7430 |
|
|
|
7431 |
# Create a hash that maps each element of $aCollection to the set of
|
|
|
7432 |
# positions it occupies in $aCollection, restricted to the elements
|
|
|
7433 |
# within the range of indexes specified by $start and $end.
|
|
|
7434 |
# The fourth parameter is a subroutine reference that will be called to
|
|
|
7435 |
# generate a string to use as a key.
|
|
|
7436 |
# Additional parameters, if any, will be passed to this subroutine.
|
|
|
7437 |
#
|
|
|
7438 |
# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen );
|
|
|
7439 |
|
|
|
7440 |
sub _withPositionsOfInInterval
|
|
|
7441 |
{
|
|
|
7442 |
my $aCollection = shift; # array ref
|
|
|
7443 |
my $start = shift;
|
|
|
7444 |
my $end = shift;
|
|
|
7445 |
my $keyGen = shift;
|
|
|
7446 |
my %d;
|
|
|
7447 |
my $index;
|
|
|
7448 |
for ( $index = $start ; $index <= $end ; $index++ )
|
|
|
7449 |
{
|
|
|
7450 |
my $element = $aCollection->[$index];
|
|
|
7451 |
my $key = &$keyGen( $element, @_ );
|
|
|
7452 |
if ( exists( $d{$key} ) )
|
|
|
7453 |
{
|
|
|
7454 |
unshift ( @{ $d{$key} }, $index );
|
|
|
7455 |
}
|
|
|
7456 |
else
|
|
|
7457 |
{
|
|
|
7458 |
$d{$key} = [$index];
|
|
|
7459 |
}
|
|
|
7460 |
}
|
|
|
7461 |
return wantarray ? %d : \%d;
|
|
|
7462 |
}
|
|
|
7463 |
|
|
|
7464 |
# Find the place at which aValue would normally be inserted into the
|
|
|
7465 |
# array. If that place is already occupied by aValue, do nothing, and
|
|
|
7466 |
# return undef. If the place does not exist (i.e., it is off the end of
|
|
|
7467 |
# the array), add it to the end, otherwise replace the element at that
|
|
|
7468 |
# point with aValue. It is assumed that the array's values are numeric.
|
|
|
7469 |
# This is where the bulk (75%) of the time is spent in this module, so
|
|
|
7470 |
# try to make it fast!
|
|
|
7471 |
|
|
|
7472 |
sub _replaceNextLargerWith
|
|
|
7473 |
{
|
|
|
7474 |
my ( $array, $aValue, $high ) = @_;
|
|
|
7475 |
$high ||= $#$array;
|
|
|
7476 |
|
|
|
7477 |
# off the end?
|
|
|
7478 |
if ( $high == -1 || $aValue > $array->[-1] )
|
|
|
7479 |
{
|
|
|
7480 |
push ( @$array, $aValue );
|
|
|
7481 |
return $high + 1;
|
|
|
7482 |
}
|
|
|
7483 |
|
|
|
7484 |
# binary search for insertion point...
|
|
|
7485 |
my $low = 0;
|
|
|
7486 |
my $index;
|
|
|
7487 |
my $found;
|
|
|
7488 |
while ( $low <= $high )
|
|
|
7489 |
{
|
|
|
7490 |
$index = ( $high + $low ) / 2;
|
|
|
7491 |
|
|
|
7492 |
# $index = int(( $high + $low ) / 2); # without 'use integer'
|
|
|
7493 |
$found = $array->[$index];
|
|
|
7494 |
|
|
|
7495 |
if ( $aValue == $found )
|
|
|
7496 |
{
|
|
|
7497 |
return undef;
|
|
|
7498 |
}
|
|
|
7499 |
elsif ( $aValue > $found )
|
|
|
7500 |
{
|
|
|
7501 |
$low = $index + 1;
|
|
|
7502 |
}
|
|
|
7503 |
else
|
|
|
7504 |
{
|
|
|
7505 |
$high = $index - 1;
|
|
|
7506 |
}
|
|
|
7507 |
}
|
|
|
7508 |
|
|
|
7509 |
# now insertion point is in $low.
|
|
|
7510 |
$array->[$low] = $aValue; # overwrite next larger
|
|
|
7511 |
return $low;
|
|
|
7512 |
}
|
|
|
7513 |
|
|
|
7514 |
# This method computes the longest common subsequence in $a and $b.
|
|
|
7515 |
|
|
|
7516 |
# Result is array or ref, whose contents is such that
|
|
|
7517 |
# $a->[ $i ] == $b->[ $result[ $i ] ]
|
|
|
7518 |
# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined.
|
|
|
7519 |
|
|
|
7520 |
# An additional argument may be passed; this is a hash or key generating
|
|
|
7521 |
# function that should return a string that uniquely identifies the given
|
|
|
7522 |
# element. It should be the case that if the key is the same, the elements
|
|
|
7523 |
# will compare the same. If this parameter is undef or missing, the key
|
|
|
7524 |
# will be the element as a string.
|
|
|
7525 |
|
|
|
7526 |
# By default, comparisons will use "eq" and elements will be turned into keys
|
|
|
7527 |
# using the default stringizing operator '""'.
|
|
|
7528 |
|
|
|
7529 |
# Additional parameters, if any, will be passed to the key generation
|
|
|
7530 |
# routine.
|
|
|
7531 |
|
|
|
7532 |
sub _longestCommonSubsequence
|
|
|
7533 |
{
|
|
|
7534 |
my $a = shift; # array ref or hash ref
|
|
|
7535 |
my $b = shift; # array ref or hash ref
|
|
|
7536 |
my $counting = shift; # scalar
|
|
|
7537 |
my $keyGen = shift; # code ref
|
|
|
7538 |
my $compare; # code ref
|
|
|
7539 |
|
|
|
7540 |
if ( ref($a) eq 'HASH' )
|
|
|
7541 |
{ # prepared hash must be in $b
|
|
|
7542 |
my $tmp = $b;
|
|
|
7543 |
$b = $a;
|
|
|
7544 |
$a = $tmp;
|
|
|
7545 |
}
|
|
|
7546 |
|
|
|
7547 |
# Check for bogus (non-ref) argument values
|
|
|
7548 |
if ( !ref($a) || !ref($b) )
|
|
|
7549 |
{
|
|
|
7550 |
my @callerInfo = caller(1);
|
|
|
7551 |
die 'error: must pass array or hash references to ' . $callerInfo[3];
|
|
|
7552 |
}
|
|
|
7553 |
|
|
|
7554 |
# set up code refs
|
|
|
7555 |
# Note that these are optimized.
|
|
|
7556 |
if ( !defined($keyGen) ) # optimize for strings
|
|
|
7557 |
{
|
|
|
7558 |
$keyGen = sub { $_[0] };
|
|
|
7559 |
$compare = sub { my ( $a, $b ) = @_; $a eq $b };
|
|
|
7560 |
}
|
|
|
7561 |
else
|
|
|
7562 |
{
|
|
|
7563 |
$compare = sub {
|
|
|
7564 |
my $a = shift;
|
|
|
7565 |
my $b = shift;
|
|
|
7566 |
&$keyGen( $a, @_ ) eq &$keyGen( $b, @_ );
|
|
|
7567 |
};
|
|
|
7568 |
}
|
|
|
7569 |
|
|
|
7570 |
my ( $aStart, $aFinish, $matchVector ) = ( 0, $#$a, [] );
|
|
|
7571 |
my ( $prunedCount, $bMatches ) = ( 0, {} );
|
|
|
7572 |
|
|
|
7573 |
if ( ref($b) eq 'HASH' ) # was $bMatches prepared for us?
|
|
|
7574 |
{
|
|
|
7575 |
$bMatches = $b;
|
|
|
7576 |
}
|
|
|
7577 |
else
|
|
|
7578 |
{
|
|
|
7579 |
my ( $bStart, $bFinish ) = ( 0, $#$b );
|
|
|
7580 |
|
|
|
7581 |
# First we prune off any common elements at the beginning
|
|
|
7582 |
while ( $aStart <= $aFinish
|
|
|
7583 |
and $bStart <= $bFinish
|
|
|
7584 |
and &$compare( $a->[$aStart], $b->[$bStart], @_ ) )
|
|
|
7585 |
{
|
|
|
7586 |
$matchVector->[ $aStart++ ] = $bStart++;
|
|
|
7587 |
$prunedCount++;
|
|
|
7588 |
}
|
|
|
7589 |
|
|
|
7590 |
# now the end
|
|
|
7591 |
while ( $aStart <= $aFinish
|
|
|
7592 |
and $bStart <= $bFinish
|
|
|
7593 |
and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) )
|
|
|
7594 |
{
|
|
|
7595 |
$matchVector->[ $aFinish-- ] = $bFinish--;
|
|
|
7596 |
$prunedCount++;
|
|
|
7597 |
}
|
|
|
7598 |
|
|
|
7599 |
# Now compute the equivalence classes of positions of elements
|
|
|
7600 |
$bMatches =
|
|
|
7601 |
_withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ );
|
|
|
7602 |
}
|
|
|
7603 |
my $thresh = [];
|
|
|
7604 |
my $links = [];
|
|
|
7605 |
|
|
|
7606 |
my ( $i, $ai, $j, $k );
|
|
|
7607 |
for ( $i = $aStart ; $i <= $aFinish ; $i++ )
|
|
|
7608 |
{
|
|
|
7609 |
$ai = &$keyGen( $a->[$i], @_ );
|
|
|
7610 |
if ( exists( $bMatches->{$ai} ) )
|
|
|
7611 |
{
|
|
|
7612 |
$k = 0;
|
|
|
7613 |
for $j ( @{ $bMatches->{$ai} } )
|
|
|
7614 |
{
|
|
|
7615 |
|
|
|
7616 |
# optimization: most of the time this will be true
|
|
|
7617 |
if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j )
|
|
|
7618 |
{
|
|
|
7619 |
$thresh->[$k] = $j;
|
|
|
7620 |
}
|
|
|
7621 |
else
|
|
|
7622 |
{
|
|
|
7623 |
$k = _replaceNextLargerWith( $thresh, $j, $k );
|
|
|
7624 |
}
|
|
|
7625 |
|
|
|
7626 |
# oddly, it's faster to always test this (CPU cache?).
|
|
|
7627 |
if ( defined($k) )
|
|
|
7628 |
{
|
|
|
7629 |
$links->[$k] =
|
|
|
7630 |
[ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ];
|
|
|
7631 |
}
|
|
|
7632 |
}
|
|
|
7633 |
}
|
|
|
7634 |
}
|
|
|
7635 |
|
|
|
7636 |
if (@$thresh)
|
|
|
7637 |
{
|
|
|
7638 |
return $prunedCount + @$thresh if $counting;
|
|
|
7639 |
for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] )
|
|
|
7640 |
{
|
|
|
7641 |
$matchVector->[ $link->[1] ] = $link->[2];
|
|
|
7642 |
}
|
|
|
7643 |
}
|
|
|
7644 |
elsif ($counting)
|
|
|
7645 |
{
|
|
|
7646 |
return $prunedCount;
|
|
|
7647 |
}
|
|
|
7648 |
|
|
|
7649 |
return wantarray ? @$matchVector : $matchVector;
|
|
|
7650 |
}
|
|
|
7651 |
|
|
|
7652 |
sub traverse_sequences
|
|
|
7653 |
{
|
|
|
7654 |
my $a = shift; # array ref
|
|
|
7655 |
my $b = shift; # array ref
|
|
|
7656 |
my $callbacks = shift || {};
|
|
|
7657 |
my $keyGen = shift;
|
|
|
7658 |
my $matchCallback = $callbacks->{'MATCH'} || sub { };
|
|
|
7659 |
my $discardACallback = $callbacks->{'DISCARD_A'} || sub { };
|
|
|
7660 |
my $finishedACallback = $callbacks->{'A_FINISHED'};
|
|
|
7661 |
my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { };
|
|
|
7662 |
my $finishedBCallback = $callbacks->{'B_FINISHED'};
|
|
|
7663 |
my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
|
|
|
7664 |
|
|
|
7665 |
# Process all the lines in @$matchVector
|
|
|
7666 |
my $lastA = $#$a;
|
|
|
7667 |
my $lastB = $#$b;
|
|
|
7668 |
my $bi = 0;
|
|
|
7669 |
my $ai;
|
|
|
7670 |
|
|
|
7671 |
for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ )
|
|
|
7672 |
{
|
|
|
7673 |
my $bLine = $matchVector->[$ai];
|
|
|
7674 |
if ( defined($bLine) ) # matched
|
|
|
7675 |
{
|
|
|
7676 |
&$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine;
|
|
|
7677 |
&$matchCallback( $ai, $bi++, @_ );
|
|
|
7678 |
}
|
|
|
7679 |
else
|
|
|
7680 |
{
|
|
|
7681 |
&$discardACallback( $ai, $bi, @_ );
|
|
|
7682 |
}
|
|
|
7683 |
}
|
|
|
7684 |
|
|
|
7685 |
# The last entry (if any) processed was a match.
|
|
|
7686 |
# $ai and $bi point just past the last matching lines in their sequences.
|
|
|
7687 |
|
|
|
7688 |
while ( $ai <= $lastA or $bi <= $lastB )
|
|
|
7689 |
{
|
|
|
7690 |
|
|
|
7691 |
# last A?
|
|
|
7692 |
if ( $ai == $lastA + 1 and $bi <= $lastB )
|
|
|
7693 |
{
|
|
|
7694 |
if ( defined($finishedACallback) )
|
|
|
7695 |
{
|
|
|
7696 |
&$finishedACallback( $lastA, @_ );
|
|
|
7697 |
$finishedACallback = undef;
|
|
|
7698 |
}
|
|
|
7699 |
else
|
|
|
7700 |
{
|
|
|
7701 |
&$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB;
|
|
|
7702 |
}
|
|
|
7703 |
}
|
|
|
7704 |
|
|
|
7705 |
# last B?
|
|
|
7706 |
if ( $bi == $lastB + 1 and $ai <= $lastA )
|
|
|
7707 |
{
|
|
|
7708 |
if ( defined($finishedBCallback) )
|
|
|
7709 |
{
|
|
|
7710 |
&$finishedBCallback( $lastB, @_ );
|
|
|
7711 |
$finishedBCallback = undef;
|
|
|
7712 |
}
|
|
|
7713 |
else
|
|
|
7714 |
{
|
|
|
7715 |
&$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA;
|
|
|
7716 |
}
|
|
|
7717 |
}
|
|
|
7718 |
|
|
|
7719 |
&$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA;
|
|
|
7720 |
&$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB;
|
|
|
7721 |
}
|
|
|
7722 |
|
|
|
7723 |
return 1;
|
|
|
7724 |
}
|
|
|
7725 |
|
|
|
7726 |
sub traverse_balanced
|
|
|
7727 |
{
|
|
|
7728 |
my $a = shift; # array ref
|
|
|
7729 |
my $b = shift; # array ref
|
|
|
7730 |
my $callbacks = shift || {};
|
|
|
7731 |
my $keyGen = shift;
|
|
|
7732 |
my $matchCallback = $callbacks->{'MATCH'} || sub { };
|
|
|
7733 |
my $discardACallback = $callbacks->{'DISCARD_A'} || sub { };
|
|
|
7734 |
my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { };
|
|
|
7735 |
my $changeCallback = $callbacks->{'CHANGE'};
|
|
|
7736 |
my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
|
|
|
7737 |
|
|
|
7738 |
# Process all the lines in match vector
|
|
|
7739 |
my $lastA = $#$a;
|
|
|
7740 |
my $lastB = $#$b;
|
|
|
7741 |
my $bi = 0;
|
|
|
7742 |
my $ai = 0;
|
|
|
7743 |
my $ma = -1;
|
|
|
7744 |
my $mb;
|
|
|
7745 |
|
|
|
7746 |
while (1)
|
|
|
7747 |
{
|
|
|
7748 |
|
|
|
7749 |
# Find next match indices $ma and $mb
|
|
|
7750 |
do {
|
|
|
7751 |
$ma++;
|
|
|
7752 |
} while(
|
|
|
7753 |
$ma <= $#$matchVector
|
|
|
7754 |
&& !defined $matchVector->[$ma]
|
|
|
7755 |
);
|
|
|
7756 |
|
|
|
7757 |
last if $ma > $#$matchVector; # end of matchVector?
|
|
|
7758 |
$mb = $matchVector->[$ma];
|
|
|
7759 |
|
|
|
7760 |
# Proceed with discard a/b or change events until
|
|
|
7761 |
# next match
|
|
|
7762 |
while ( $ai < $ma || $bi < $mb )
|
|
|
7763 |
{
|
|
|
7764 |
|
|
|
7765 |
if ( $ai < $ma && $bi < $mb )
|
|
|
7766 |
{
|
|
|
7767 |
|
|
|
7768 |
# Change
|
|
|
7769 |
if ( defined $changeCallback )
|
|
|
7770 |
{
|
|
|
7771 |
&$changeCallback( $ai++, $bi++, @_ );
|
|
|
7772 |
}
|
|
|
7773 |
else
|
|
|
7774 |
{
|
|
|
7775 |
&$discardACallback( $ai++, $bi, @_ );
|
|
|
7776 |
&$discardBCallback( $ai, $bi++, @_ );
|
|
|
7777 |
}
|
|
|
7778 |
}
|
|
|
7779 |
elsif ( $ai < $ma )
|
|
|
7780 |
{
|
|
|
7781 |
&$discardACallback( $ai++, $bi, @_ );
|
|
|
7782 |
}
|
|
|
7783 |
else
|
|
|
7784 |
{
|
|
|
7785 |
|
|
|
7786 |
# $bi < $mb
|
|
|
7787 |
&$discardBCallback( $ai, $bi++, @_ );
|
|
|
7788 |
}
|
|
|
7789 |
}
|
|
|
7790 |
|
|
|
7791 |
# Match
|
|
|
7792 |
&$matchCallback( $ai++, $bi++, @_ );
|
|
|
7793 |
}
|
|
|
7794 |
|
|
|
7795 |
while ( $ai <= $lastA || $bi <= $lastB )
|
|
|
7796 |
{
|
|
|
7797 |
if ( $ai <= $lastA && $bi <= $lastB )
|
|
|
7798 |
{
|
|
|
7799 |
|
|
|
7800 |
# Change
|
|
|
7801 |
if ( defined $changeCallback )
|
|
|
7802 |
{
|
|
|
7803 |
&$changeCallback( $ai++, $bi++, @_ );
|
|
|
7804 |
}
|
|
|
7805 |
else
|
|
|
7806 |
{
|
|
|
7807 |
&$discardACallback( $ai++, $bi, @_ );
|
|
|
7808 |
&$discardBCallback( $ai, $bi++, @_ );
|
|
|
7809 |
}
|
|
|
7810 |
}
|
|
|
7811 |
elsif ( $ai <= $lastA )
|
|
|
7812 |
{
|
|
|
7813 |
&$discardACallback( $ai++, $bi, @_ );
|
|
|
7814 |
}
|
|
|
7815 |
else
|
|
|
7816 |
{
|
|
|
7817 |
|
|
|
7818 |
# $bi <= $lastB
|
|
|
7819 |
&$discardBCallback( $ai, $bi++, @_ );
|
|
|
7820 |
}
|
|
|
7821 |
}
|
|
|
7822 |
|
|
|
7823 |
return 1;
|
|
|
7824 |
}
|
|
|
7825 |
|
|
|
7826 |
sub prepare
|
|
|
7827 |
{
|
|
|
7828 |
my $a = shift; # array ref
|
|
|
7829 |
my $keyGen = shift; # code ref
|
|
|
7830 |
|
|
|
7831 |
# set up code ref
|
|
|
7832 |
$keyGen = sub { $_[0] } unless defined($keyGen);
|
|
|
7833 |
|
|
|
7834 |
return scalar _withPositionsOfInInterval( $a, 0, $#$a, $keyGen, @_ );
|
|
|
7835 |
}
|
|
|
7836 |
|
|
|
7837 |
sub LCS
|
|
|
7838 |
{
|
|
|
7839 |
my $a = shift; # array ref
|
|
|
7840 |
my $b = shift; # array ref or hash ref
|
|
|
7841 |
my $matchVector = _longestCommonSubsequence( $a, $b, 0, @_ );
|
|
|
7842 |
my @retval;
|
|
|
7843 |
my $i;
|
|
|
7844 |
for ( $i = 0 ; $i <= $#$matchVector ; $i++ )
|
|
|
7845 |
{
|
|
|
7846 |
if ( defined( $matchVector->[$i] ) )
|
|
|
7847 |
{
|
|
|
7848 |
push ( @retval, $a->[$i] );
|
|
|
7849 |
}
|
|
|
7850 |
}
|
|
|
7851 |
return wantarray ? @retval : \@retval;
|
|
|
7852 |
}
|
|
|
7853 |
|
|
|
7854 |
sub LCS_length
|
|
|
7855 |
{
|
|
|
7856 |
my $a = shift; # array ref
|
|
|
7857 |
my $b = shift; # array ref or hash ref
|
|
|
7858 |
return _longestCommonSubsequence( $a, $b, 1, @_ );
|
|
|
7859 |
}
|
|
|
7860 |
|
|
|
7861 |
sub LCSidx
|
|
|
7862 |
{
|
|
|
7863 |
my $a= shift @_;
|
|
|
7864 |
my $b= shift @_;
|
|
|
7865 |
my $match= _longestCommonSubsequence( $a, $b, 0, @_ );
|
|
|
7866 |
my @am= grep defined $match->[$_], 0..$#$match;
|
|
|
7867 |
my @bm= @{$match}[@am];
|
|
|
7868 |
return \@am, \@bm;
|
|
|
7869 |
}
|
|
|
7870 |
|
|
|
7871 |
sub compact_diff
|
|
|
7872 |
{
|
|
|
7873 |
my $a= shift @_;
|
|
|
7874 |
my $b= shift @_;
|
|
|
7875 |
my( $am, $bm )= LCSidx( $a, $b, @_ );
|
|
|
7876 |
my @cdiff;
|
|
|
7877 |
my( $ai, $bi )= ( 0, 0 );
|
|
|
7878 |
push @cdiff, $ai, $bi;
|
|
|
7879 |
while( 1 ) {
|
|
|
7880 |
while( @$am && $ai == $am->[0] && $bi == $bm->[0] ) {
|
|
|
7881 |
shift @$am;
|
|
|
7882 |
shift @$bm;
|
|
|
7883 |
++$ai, ++$bi;
|
|
|
7884 |
}
|
|
|
7885 |
push @cdiff, $ai, $bi;
|
|
|
7886 |
last if ! @$am;
|
|
|
7887 |
$ai = $am->[0];
|
|
|
7888 |
$bi = $bm->[0];
|
|
|
7889 |
push @cdiff, $ai, $bi;
|
|
|
7890 |
}
|
|
|
7891 |
push @cdiff, 0+@$a, 0+@$b
|
|
|
7892 |
if $ai < @$a || $bi < @$b;
|
|
|
7893 |
return wantarray ? @cdiff : \@cdiff;
|
|
|
7894 |
}
|
|
|
7895 |
|
|
|
7896 |
sub diff
|
|
|
7897 |
{
|
|
|
7898 |
my $a = shift; # array ref
|
|
|
7899 |
my $b = shift; # array ref
|
|
|
7900 |
my $retval = [];
|
|
|
7901 |
my $hunk = [];
|
|
|
7902 |
my $discard = sub {
|
|
|
7903 |
push @$hunk, [ '-', $_[0], $a->[ $_[0] ] ];
|
|
|
7904 |
};
|
|
|
7905 |
my $add = sub {
|
|
|
7906 |
push @$hunk, [ '+', $_[1], $b->[ $_[1] ] ];
|
|
|
7907 |
};
|
|
|
7908 |
my $match = sub {
|
|
|
7909 |
push @$retval, $hunk
|
|
|
7910 |
if 0 < @$hunk;
|
|
|
7911 |
$hunk = []
|
|
|
7912 |
};
|
|
|
7913 |
traverse_sequences( $a, $b,
|
|
|
7914 |
{ MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ );
|
|
|
7915 |
&$match();
|
|
|
7916 |
return wantarray ? @$retval : $retval;
|
|
|
7917 |
}
|
|
|
7918 |
|
|
|
7919 |
sub sdiff
|
|
|
7920 |
{
|
|
|
7921 |
my $a = shift; # array ref
|
|
|
7922 |
my $b = shift; # array ref
|
|
|
7923 |
my $retval = [];
|
|
|
7924 |
my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) };
|
|
|
7925 |
my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) };
|
|
|
7926 |
my $change = sub {
|
|
|
7927 |
push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] );
|
|
|
7928 |
};
|
|
|
7929 |
my $match = sub {
|
|
|
7930 |
push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] );
|
|
|
7931 |
};
|
|
|
7932 |
traverse_balanced(
|
|
|
7933 |
$a,
|
|
|
7934 |
$b,
|
|
|
7935 |
{
|
|
|
7936 |
MATCH => $match,
|
|
|
7937 |
DISCARD_A => $discard,
|
|
|
7938 |
DISCARD_B => $add,
|
|
|
7939 |
CHANGE => $change,
|
|
|
7940 |
},
|
|
|
7941 |
@_
|
|
|
7942 |
);
|
|
|
7943 |
return wantarray ? @$retval : $retval;
|
|
|
7944 |
}
|
|
|
7945 |
|
|
|
7946 |
########################################
|
|
|
7947 |
my $Root= __PACKAGE__;
|
|
|
7948 |
package Algorithm::Diff::_impl;
|
|
|
7949 |
use strict;
|
|
|
7950 |
|
|
|
7951 |
sub _Idx() { 0 } # $me->[_Idx]: Ref to array of hunk indices
|
|
|
7952 |
# 1 # $me->[1]: Ref to first sequence
|
|
|
7953 |
# 2 # $me->[2]: Ref to second sequence
|
|
|
7954 |
sub _End() { 3 } # $me->[_End]: Diff between forward and reverse pos
|
|
|
7955 |
sub _Same() { 4 } # $me->[_Same]: 1 if pos 1 contains unchanged items
|
|
|
7956 |
sub _Base() { 5 } # $me->[_Base]: Added to range's min and max
|
|
|
7957 |
sub _Pos() { 6 } # $me->[_Pos]: Which hunk is currently selected
|
|
|
7958 |
sub _Off() { 7 } # $me->[_Off]: Offset into _Idx for current position
|
|
|
7959 |
sub _Min() { -2 } # Added to _Off to get min instead of max+1
|
|
|
7960 |
|
|
|
7961 |
sub Die
|
|
|
7962 |
{
|
|
|
7963 |
require Carp;
|
|
|
7964 |
Carp::confess( @_ );
|
|
|
7965 |
}
|
|
|
7966 |
|
|
|
7967 |
sub _ChkPos
|
|
|
7968 |
{
|
|
|
7969 |
my( $me )= @_;
|
|
|
7970 |
return if $me->[_Pos];
|
|
|
7971 |
my $meth= ( caller(1) )[3];
|
|
|
7972 |
Die( "Called $meth on 'reset' object" );
|
|
|
7973 |
}
|
|
|
7974 |
|
|
|
7975 |
sub _ChkSeq
|
|
|
7976 |
{
|
|
|
7977 |
my( $me, $seq )= @_;
|
|
|
7978 |
return $seq + $me->[_Off]
|
|
|
7979 |
if 1 == $seq || 2 == $seq;
|
|
|
7980 |
my $meth= ( caller(1) )[3];
|
|
|
7981 |
Die( "$meth: Invalid sequence number ($seq); must be 1 or 2" );
|
|
|
7982 |
}
|
|
|
7983 |
|
|
|
7984 |
sub getObjPkg
|
|
|
7985 |
{
|
|
|
7986 |
my( $us )= @_;
|
|
|
7987 |
return ref $us if ref $us;
|
|
|
7988 |
return $us . "::_obj";
|
|
|
7989 |
}
|
|
|
7990 |
|
|
|
7991 |
sub new
|
|
|
7992 |
{
|
|
|
7993 |
my( $us, $seq1, $seq2, $opts ) = @_;
|
|
|
7994 |
my @args;
|
|
|
7995 |
for( $opts->{keyGen} ) {
|
|
|
7996 |
push @args, $_ if $_;
|
|
|
7997 |
}
|
|
|
7998 |
for( $opts->{keyGenArgs} ) {
|
|
|
7999 |
push @args, @$_ if $_;
|
|
|
8000 |
}
|
|
|
8001 |
my $cdif= Algorithm::Diff::compact_diff( $seq1, $seq2, @args );
|
|
|
8002 |
my $same= 1;
|
|
|
8003 |
if( 0 == $cdif->[2] && 0 == $cdif->[3] ) {
|
|
|
8004 |
$same= 0;
|
|
|
8005 |
splice @$cdif, 0, 2;
|
|
|
8006 |
}
|
|
|
8007 |
my @obj= ( $cdif, $seq1, $seq2 );
|
|
|
8008 |
$obj[_End] = (1+@$cdif)/2;
|
|
|
8009 |
$obj[_Same] = $same;
|
|
|
8010 |
$obj[_Base] = 0;
|
|
|
8011 |
my $me = bless \@obj, $us->getObjPkg();
|
|
|
8012 |
$me->Reset( 0 );
|
|
|
8013 |
return $me;
|
|
|
8014 |
}
|
|
|
8015 |
|
|
|
8016 |
sub Reset
|
|
|
8017 |
{
|
|
|
8018 |
my( $me, $pos )= @_;
|
|
|
8019 |
$pos= int( $pos || 0 );
|
|
|
8020 |
$pos += $me->[_End]
|
|
|
8021 |
if $pos < 0;
|
|
|
8022 |
$pos= 0
|
|
|
8023 |
if $pos < 0 || $me->[_End] <= $pos;
|
|
|
8024 |
$me->[_Pos]= $pos || !1;
|
|
|
8025 |
$me->[_Off]= 2*$pos - 1;
|
|
|
8026 |
return $me;
|
|
|
8027 |
}
|
|
|
8028 |
|
|
|
8029 |
sub Base
|
|
|
8030 |
{
|
|
|
8031 |
my( $me, $base )= @_;
|
|
|
8032 |
my $oldBase= $me->[_Base];
|
|
|
8033 |
$me->[_Base]= 0+$base if defined $base;
|
|
|
8034 |
return $oldBase;
|
|
|
8035 |
}
|
|
|
8036 |
|
|
|
8037 |
sub Copy
|
|
|
8038 |
{
|
|
|
8039 |
my( $me, $pos, $base )= @_;
|
|
|
8040 |
my @obj= @$me;
|
|
|
8041 |
my $you= bless \@obj, ref($me);
|
|
|
8042 |
$you->Reset( $pos ) if defined $pos;
|
|
|
8043 |
$you->Base( $base );
|
|
|
8044 |
return $you;
|
|
|
8045 |
}
|
|
|
8046 |
|
|
|
8047 |
sub Next {
|
|
|
8048 |
my( $me, $steps )= @_;
|
|
|
8049 |
$steps= 1 if ! defined $steps;
|
|
|
8050 |
if( $steps ) {
|
|
|
8051 |
my $pos= $me->[_Pos];
|
|
|
8052 |
my $new= $pos + $steps;
|
|
|
8053 |
$new= 0 if $pos && $new < 0;
|
|
|
8054 |
$me->Reset( $new )
|
|
|
8055 |
}
|
|
|
8056 |
return $me->[_Pos];
|
|
|
8057 |
}
|
|
|
8058 |
|
|
|
8059 |
sub Prev {
|
|
|
8060 |
my( $me, $steps )= @_;
|
|
|
8061 |
$steps= 1 if ! defined $steps;
|
|
|
8062 |
my $pos= $me->Next(-$steps);
|
|
|
8063 |
$pos -= $me->[_End] if $pos;
|
|
|
8064 |
return $pos;
|
|
|
8065 |
}
|
|
|
8066 |
|
|
|
8067 |
sub Diff {
|
|
|
8068 |
my( $me )= @_;
|
|
|
8069 |
$me->_ChkPos();
|
|
|
8070 |
return 0 if $me->[_Same] == ( 1 & $me->[_Pos] );
|
|
|
8071 |
my $ret= 0;
|
|
|
8072 |
my $off= $me->[_Off];
|
|
|
8073 |
for my $seq ( 1, 2 ) {
|
|
|
8074 |
$ret |= $seq
|
|
|
8075 |
if $me->[_Idx][ $off + $seq + _Min ]
|
|
|
8076 |
< $me->[_Idx][ $off + $seq ];
|
|
|
8077 |
}
|
|
|
8078 |
return $ret;
|
|
|
8079 |
}
|
|
|
8080 |
|
|
|
8081 |
sub Min {
|
|
|
8082 |
my( $me, $seq, $base )= @_;
|
|
|
8083 |
$me->_ChkPos();
|
|
|
8084 |
my $off= $me->_ChkSeq($seq);
|
|
|
8085 |
$base= $me->[_Base] if !defined $base;
|
|
|
8086 |
return $base + $me->[_Idx][ $off + _Min ];
|
|
|
8087 |
}
|
|
|
8088 |
|
|
|
8089 |
sub Max {
|
|
|
8090 |
my( $me, $seq, $base )= @_;
|
|
|
8091 |
$me->_ChkPos();
|
|
|
8092 |
my $off= $me->_ChkSeq($seq);
|
|
|
8093 |
$base= $me->[_Base] if !defined $base;
|
|
|
8094 |
return $base + $me->[_Idx][ $off ] -1;
|
|
|
8095 |
}
|
|
|
8096 |
|
|
|
8097 |
sub Range {
|
|
|
8098 |
my( $me, $seq, $base )= @_;
|
|
|
8099 |
$me->_ChkPos();
|
|
|
8100 |
my $off = $me->_ChkSeq($seq);
|
|
|
8101 |
if( !wantarray ) {
|
|
|
8102 |
return $me->[_Idx][ $off ]
|
|
|
8103 |
- $me->[_Idx][ $off + _Min ];
|
|
|
8104 |
}
|
|
|
8105 |
$base= $me->[_Base] if !defined $base;
|
|
|
8106 |
return ( $base + $me->[_Idx][ $off + _Min ] )
|
|
|
8107 |
.. ( $base + $me->[_Idx][ $off ] - 1 );
|
|
|
8108 |
}
|
|
|
8109 |
|
|
|
8110 |
sub Items {
|
|
|
8111 |
my( $me, $seq )= @_;
|
|
|
8112 |
$me->_ChkPos();
|
|
|
8113 |
my $off = $me->_ChkSeq($seq);
|
|
|
8114 |
if( !wantarray ) {
|
|
|
8115 |
return $me->[_Idx][ $off ]
|
|
|
8116 |
- $me->[_Idx][ $off + _Min ];
|
|
|
8117 |
}
|
|
|
8118 |
return
|
|
|
8119 |
@{$me->[$seq]}[
|
|
|
8120 |
$me->[_Idx][ $off + _Min ]
|
|
|
8121 |
.. ( $me->[_Idx][ $off ] - 1 )
|
|
|
8122 |
];
|
|
|
8123 |
}
|
|
|
8124 |
|
|
|
8125 |
sub Same {
|
|
|
8126 |
my( $me )= @_;
|
|
|
8127 |
$me->_ChkPos();
|
|
|
8128 |
return wantarray ? () : 0
|
|
|
8129 |
if $me->[_Same] != ( 1 & $me->[_Pos] );
|
|
|
8130 |
return $me->Items(1);
|
|
|
8131 |
}
|
|
|
8132 |
|
|
|
8133 |
my %getName;
|
|
|
8134 |
BEGIN {
|
|
|
8135 |
%getName= (
|
|
|
8136 |
same => \&Same,
|
|
|
8137 |
diff => \&Diff,
|
|
|
8138 |
base => \&Base,
|
|
|
8139 |
min => \&Min,
|
|
|
8140 |
max => \&Max,
|
|
|
8141 |
range=> \&Range,
|
|
|
8142 |
items=> \&Items, # same thing
|
|
|
8143 |
);
|
|
|
8144 |
}
|
|
|
8145 |
|
|
|
8146 |
sub Get
|
|
|
8147 |
{
|
|
|
8148 |
my $me= shift @_;
|
|
|
8149 |
$me->_ChkPos();
|
|
|
8150 |
my @value;
|
|
|
8151 |
for my $arg ( @_ ) {
|
|
|
8152 |
for my $word ( split ' ', $arg ) {
|
|
|
8153 |
my $meth;
|
|
|
8154 |
if( $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/
|
|
|
8155 |
|| not $meth= $getName{ lc $2 }
|
|
|
8156 |
) {
|
|
|
8157 |
Die( $Root, ", Get: Invalid request ($word)" );
|
|
|
8158 |
}
|
|
|
8159 |
my( $base, $name, $seq )= ( $1, $2, $3 );
|
|
|
8160 |
push @value, scalar(
|
|
|
8161 |
4 == length($name)
|
|
|
8162 |
? $meth->( $me )
|
|
|
8163 |
: $meth->( $me, $seq, $base )
|
|
|
8164 |
);
|
|
|
8165 |
}
|
|
|
8166 |
}
|
|
|
8167 |
if( wantarray ) {
|
|
|
8168 |
return @value;
|
|
|
8169 |
} elsif( 1 == @value ) {
|
|
|
8170 |
return $value[0];
|
|
|
8171 |
}
|
|
|
8172 |
Die( 0+@value, " values requested from ",
|
|
|
8173 |
$Root, "'s Get in scalar context" );
|
|
|
8174 |
}
|
|
|
8175 |
|
|
|
8176 |
|
|
|
8177 |
my $Obj= getObjPkg($Root);
|
|
|
8178 |
no strict 'refs';
|
|
|
8179 |
|
|
|
8180 |
for my $meth ( qw( new getObjPkg ) ) {
|
|
|
8181 |
*{$Root."::".$meth} = \&{$meth};
|
|
|
8182 |
*{$Obj ."::".$meth} = \&{$meth};
|
|
|
8183 |
}
|
|
|
8184 |
for my $meth ( qw(
|
|
|
8185 |
Next Prev Reset Copy Base Diff
|
|
|
8186 |
Same Items Range Min Max Get
|
|
|
8187 |
_ChkPos _ChkSeq
|
|
|
8188 |
) ) {
|
|
|
8189 |
*{$Obj."::".$meth} = \&{$meth};
|
|
|
8190 |
}
|
|
|
8191 |
|
|
|
8192 |
1;
|
|
|
8193 |
# This version released by Tye McQueen (http://perlmonks.org/?node=tye).
|
|
|
8194 |
#
|
|
|
8195 |
# =head1 LICENSE
|
|
|
8196 |
#
|
|
|
8197 |
# Parts Copyright (c) 2000-2004 Ned Konz. All rights reserved.
|
|
|
8198 |
# Parts by Tye McQueen.
|
|
|
8199 |
#
|
|
|
8200 |
# This program is free software; you can redistribute it and/or modify it
|
|
|
8201 |
# under the same terms as Perl.
|
|
|
8202 |
#
|
|
|
8203 |
# =head1 MAILING LIST
|
|
|
8204 |
#
|
|
|
8205 |
# Mark-Jason still maintains a mailing list. To join a low-volume mailing
|
|
|
8206 |
# list for announcements related to diff and Algorithm::Diff, send an
|
|
|
8207 |
# empty mail message to mjd-perl-diff-request@plover.com.
|
|
|
8208 |
# =head1 CREDITS
|
|
|
8209 |
#
|
|
|
8210 |
# Versions through 0.59 (and much of this documentation) were written by:
|
|
|
8211 |
#
|
|
|
8212 |
# Mark-Jason Dominus, mjd-perl-diff@plover.com
|
|
|
8213 |
#
|
|
|
8214 |
# This version borrows some documentation and routine names from
|
|
|
8215 |
# Mark-Jason's, but Diff.pm's code was completely replaced.
|
|
|
8216 |
#
|
|
|
8217 |
# This code was adapted from the Smalltalk code of Mario Wolczko
|
|
|
8218 |
# <mario@wolczko.com>, which is available at
|
|
|
8219 |
# ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st
|
|
|
8220 |
#
|
|
|
8221 |
# C<sdiff> and C<traverse_balanced> were written by Mike Schilli
|
|
|
8222 |
# <m@perlmeister.com>.
|
|
|
8223 |
#
|
|
|
8224 |
# The algorithm is that described in
|
|
|
8225 |
# I<A Fast Algorithm for Computing Longest Common Subsequences>,
|
|
|
8226 |
# CACM, vol.20, no.5, pp.350-353, May 1977, with a few
|
|
|
8227 |
# minor improvements to improve the speed.
|
|
|
8228 |
#
|
|
|
8229 |
# Much work was done by Ned Konz (perl@bike-nomad.com).
|
|
|
8230 |
#
|
|
|
8231 |
# The OO interface and some other changes are by Tye McQueen.
|
|
|
8232 |
#
|
|
|
8233 |
EOAlgDiff
|
|
|
8234 |
# 2}}}
|
|
|
8235 |
my $problems = 0;
|
|
|
8236 |
$HAVE_Algorith_Diff = 0;
|
|
|
8237 |
my $dir = "";
|
|
|
8238 |
if ($opt_sdir) {
|
|
|
8239 |
++$TEMP_OFF;
|
|
|
8240 |
$dir = "$opt_sdir/$TEMP_OFF";
|
|
|
8241 |
File::Path::rmtree($dir) if is_dir($dir);
|
|
|
8242 |
File::Path::mkpath($dir) unless is_dir($dir);
|
|
|
8243 |
} else {
|
|
|
8244 |
# let File::Temp create a suitable temporary directory
|
|
|
8245 |
$dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
|
8246 |
$TEMP_INST{ $dir } = "Algorithm::Diff";
|
|
|
8247 |
}
|
|
|
8248 |
print "Using temp dir [$dir] to install Algorithm::Diff\n" if $opt_v;
|
|
|
8249 |
my $Algorithm_dir = "$dir/Algorithm";
|
|
|
8250 |
my $Algorithm_Diff_dir = "$dir/Algorithm/Diff";
|
|
|
8251 |
mkdir $Algorithm_dir ;
|
|
|
8252 |
mkdir $Algorithm_Diff_dir;
|
|
|
8253 |
|
|
|
8254 |
my $OUT = new IO::File "$dir/Algorithm/Diff.pm", "w";
|
|
|
8255 |
if (defined $OUT) {
|
|
|
8256 |
print $OUT $Algorithm_Diff_Contents;
|
|
|
8257 |
$OUT->close;
|
|
|
8258 |
} else {
|
|
|
8259 |
warn "Failed to install Algorithm/Diff.pm\n";
|
|
|
8260 |
$problems = 1;
|
|
|
8261 |
}
|
|
|
8262 |
|
|
|
8263 |
push @INC, $dir; # between this & Regexp::Common only need to do once
|
|
|
8264 |
eval "use Algorithm::Diff qw / sdiff /";
|
|
|
8265 |
$HAVE_Algorith_Diff = 1 unless $problems;
|
|
|
8266 |
} # 1}}}
|
|
|
8267 |
sub call_regexp_common { # {{{1
|
|
|
8268 |
my ($ra_lines, $language ) = @_;
|
|
|
8269 |
print "-> call_regexp_common\n" if $opt_v > 2;
|
|
|
8270 |
|
|
|
8271 |
Install_Regexp_Common() unless $HAVE_Rexexp_Common;
|
|
|
8272 |
|
|
|
8273 |
my $all_lines = join("", @{$ra_lines});
|
|
|
8274 |
|
|
|
8275 |
no strict 'vars';
|
|
|
8276 |
# otherwise get:
|
|
|
8277 |
# Global symbol "%RE" requires explicit package name at cloc line xx.
|
|
|
8278 |
if ($all_lines =~ $RE{comment}{$language}) {
|
|
|
8279 |
# Suppress "Use of uninitialized value in regexp compilation" that
|
|
|
8280 |
# pops up when $1 is undefined--happens if there's a bug in the $RE
|
|
|
8281 |
# This Pascal comment will trigger it:
|
|
|
8282 |
# (* This is { another } test. **)
|
|
|
8283 |
# Curiously, testing for "defined $1" breaks the substitution.
|
|
|
8284 |
no warnings;
|
|
|
8285 |
# remove comments
|
|
|
8286 |
$all_lines =~ s/$1//g;
|
|
|
8287 |
}
|
|
|
8288 |
# a bogus use of %RE to avoid:
|
|
|
8289 |
# Name "main::RE" used only once: possible typo at cloc line xx.
|
|
|
8290 |
print scalar keys %RE if $opt_v < -20;
|
|
|
8291 |
#?#print "$all_lines\n";
|
|
|
8292 |
print "<- call_regexp_common\n" if $opt_v > 2;
|
|
|
8293 |
return split("\n", $all_lines);
|
|
|
8294 |
} # 1}}}
|
|
|
8295 |
sub plural_form { # {{{1
|
|
|
8296 |
# For getting the right plural form on some English nouns.
|
|
|
8297 |
my $n = shift @_;
|
|
|
8298 |
if ($n == 1) { return ( 1, "" ); }
|
|
|
8299 |
else { return ($n, "s"); }
|
|
|
8300 |
} # 1}}}
|
|
|
8301 |
sub matlab_or_objective_C { # {{{1
|
|
|
8302 |
# Decide if code is MATLAB, Objective C, MUMPS, or Mercury
|
|
|
8303 |
my ($file , # in
|
|
|
8304 |
$rh_Err , # in hash of error codes
|
|
|
8305 |
$raa_errors , # out
|
|
|
8306 |
$rs_language , # out
|
|
|
8307 |
) = @_;
|
|
|
8308 |
print "-> matlab_or_objective_C\n" if $opt_v > 2;
|
|
|
8309 |
# matlab markers:
|
|
|
8310 |
# first line starts with "function"
|
|
|
8311 |
# some lines start with "%"
|
|
|
8312 |
# high marks for lines that start with [
|
|
|
8313 |
#
|
|
|
8314 |
# Objective C markers:
|
|
|
8315 |
# must have at least two brace characters, { }
|
|
|
8316 |
# has /* ... */ style comments
|
|
|
8317 |
# some lines start with @
|
|
|
8318 |
# some lines start with #include
|
|
|
8319 |
#
|
|
|
8320 |
# MUMPS:
|
|
|
8321 |
# has ; comment markers
|
|
|
8322 |
# do not match: \w+\s*=\s*\w
|
|
|
8323 |
# lines begin with \s*\.?\w+\s+\w
|
|
|
8324 |
# high marks for lines that start with \s*K\s+ or \s*Kill\s+
|
|
|
8325 |
#
|
|
|
8326 |
# Mercury:
|
|
|
8327 |
# any line that begins with :- immediately triggers this
|
|
|
8328 |
|
|
|
8329 |
${$rs_language} = "";
|
|
|
8330 |
my $IN = new IO::File $file, "r";
|
|
|
8331 |
if (!defined $IN) {
|
|
|
8332 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
8333 |
return;
|
|
|
8334 |
}
|
|
|
8335 |
|
|
|
8336 |
my $DEBUG = 0;
|
|
|
8337 |
|
|
|
8338 |
my $matlab_points = 0;
|
|
|
8339 |
my $objective_C_points = 0;
|
|
|
8340 |
my $mumps_points = 0;
|
|
|
8341 |
my $mercury_points = 0;
|
|
|
8342 |
my $has_braces = 0;
|
|
|
8343 |
while (<$IN>) {
|
|
|
8344 |
++$has_braces if $_ =~ m/[{}]/;
|
|
|
8345 |
#print "LINE $. has_braces=$has_braces\n";
|
|
|
8346 |
++$mumps_points if $. == 1 and m{^[A-Z]};
|
|
|
8347 |
if (m{^\s*/\*} or m {^\s*//}) { # /* or //
|
|
|
8348 |
$objective_C_points += 5;
|
|
|
8349 |
$matlab_points -= 5;
|
|
|
8350 |
printf ".m: /*|// obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
8351 |
} elsif (m{^:-\s+}) { # gotta be mercury
|
|
|
8352 |
$mercury_points = 1000;
|
|
|
8353 |
last;
|
|
|
8354 |
} elsif (m{\w+\s*=\s*\[}) { # matrix assignment, very matlab
|
|
|
8355 |
$matlab_points += 5;
|
|
|
8356 |
printf ".m: \\w=[ obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
8357 |
} elsif (m{^\s*\w+\s*=\s*}) { # definitely not MUMPS
|
|
|
8358 |
--$mumps_points;
|
|
|
8359 |
printf ".m: \\w= obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
8360 |
} elsif (m{^\s*\.?(\w)\s+(\w)} and $1 !~ /\d/ and $2 !~ /\d/) {
|
|
|
8361 |
++$mumps_points;
|
|
|
8362 |
printf ".m: \\w \\w obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
8363 |
} elsif (m{^\s*;}) {
|
|
|
8364 |
++$mumps_points;
|
|
|
8365 |
printf ".m: ; obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
8366 |
} elsif (m{^\s*#(include|import)}) {
|
|
|
8367 |
# Objective C without a doubt
|
|
|
8368 |
$objective_C_points = 1000;
|
|
|
8369 |
$matlab_points = 0;
|
|
|
8370 |
printf ".m: #includ obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
8371 |
$has_braces = 2;
|
|
|
8372 |
last;
|
|
|
8373 |
} elsif (m{^\s*@(interface|implementation|protocol|public|protected|private|end)\s}o) {
|
|
|
8374 |
# Objective C without a doubt
|
|
|
8375 |
$objective_C_points = 1000;
|
|
|
8376 |
$matlab_points = 0;
|
|
|
8377 |
printf ".m: keyword obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
8378 |
last;
|
|
|
8379 |
} elsif (m{^\s*\[}) { # line starts with [ -- very matlab
|
|
|
8380 |
$matlab_points += 5;
|
|
|
8381 |
printf ".m: [ obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
8382 |
} elsif (m{^\sK(ill)?\s+}) {
|
|
|
8383 |
$mumps_points += 5;
|
|
|
8384 |
printf ".m: Kill obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
8385 |
} elsif (m{^\s*function}) {
|
|
|
8386 |
--$objective_C_points;
|
|
|
8387 |
++$matlab_points;
|
|
|
8388 |
printf ".m: funct obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
8389 |
} elsif (m{^\s*%}) { # %
|
|
|
8390 |
# matlab commented line
|
|
|
8391 |
--$objective_C_points;
|
|
|
8392 |
++$matlab_points;
|
|
|
8393 |
printf ".m: pcent obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
8394 |
}
|
|
|
8395 |
}
|
|
|
8396 |
$IN->close;
|
|
|
8397 |
printf "END LOOP obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
8398 |
|
|
|
8399 |
# next heuristic is unreliable for small files
|
|
|
8400 |
# $objective_C_points = -9.9e20 unless $has_braces >= 2;
|
|
|
8401 |
|
|
|
8402 |
my %points = ( 'MATLAB' => $matlab_points ,
|
|
|
8403 |
'MUMPS' => $mumps_points ,
|
|
|
8404 |
'Objective C' => $objective_C_points,
|
|
|
8405 |
'Mercury' => $mercury_points , );
|
|
|
8406 |
|
|
|
8407 |
${$rs_language} = (sort { $points{$b} <=> $points{$a}} keys %points)[0];
|
|
|
8408 |
|
|
|
8409 |
print "<- matlab_or_objective_C($file: matlab=$matlab_points, C=$objective_C_points, mumps=$mumps_points, mercury=$mercury_points) => ${$rs_language}\n"
|
|
|
8410 |
if $opt_v > 2;
|
|
|
8411 |
|
|
|
8412 |
} # 1}}}
|
|
|
8413 |
sub Lisp_or_OpenCL { # {{{1
|
|
|
8414 |
my ($file , # in
|
|
|
8415 |
$rh_Err , # in hash of error codes
|
|
|
8416 |
$raa_errors , # out
|
|
|
8417 |
) = @_;
|
|
|
8418 |
|
|
|
8419 |
print "-> Lisp_or_OpenCL\n" if $opt_v > 2;
|
|
|
8420 |
|
|
|
8421 |
my $lang = undef;
|
|
|
8422 |
my $IN = new IO::File $file, "r";
|
|
|
8423 |
if (!defined $IN) {
|
|
|
8424 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
8425 |
return $lang;
|
|
|
8426 |
}
|
|
|
8427 |
my $lisp_points = 0;
|
|
|
8428 |
my $opcl_points = 0;
|
|
|
8429 |
while (<$IN>) {
|
|
|
8430 |
++$lisp_points if /^\s*;/;
|
|
|
8431 |
++$lisp_points if /\((def|eval|require|export|let|loop|dec|format)/;
|
|
|
8432 |
++$opcl_points if /^\s*(int|float|const|{)/;
|
|
|
8433 |
}
|
|
|
8434 |
$IN->close;
|
|
|
8435 |
# print "lisp_points=$lisp_points opcl_points=$opcl_points\n";
|
|
|
8436 |
if ($lisp_points > $opcl_points) {
|
|
|
8437 |
$lang = "Lisp";
|
|
|
8438 |
} else {
|
|
|
8439 |
$lang = "OpenCL";
|
|
|
8440 |
}
|
|
|
8441 |
|
|
|
8442 |
print "<- Lisp_or_OpenCL\n" if $opt_v > 2;
|
|
|
8443 |
return $lang;
|
|
|
8444 |
} # 1}}}
|
|
|
8445 |
sub Lisp_or_Julia { # {{{1
|
|
|
8446 |
my ($file , # in
|
|
|
8447 |
$rh_Err , # in hash of error codes
|
|
|
8448 |
$raa_errors , # out
|
|
|
8449 |
) = @_;
|
|
|
8450 |
|
|
|
8451 |
print "-> Lisp_or_Julia\n" if $opt_v > 2;
|
|
|
8452 |
|
|
|
8453 |
my $lang = undef;
|
|
|
8454 |
my $IN = new IO::File $file, "r";
|
|
|
8455 |
if (!defined $IN) {
|
|
|
8456 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
8457 |
return $lang;
|
|
|
8458 |
}
|
|
|
8459 |
my $lisp_points = 0;
|
|
|
8460 |
my $julia_points = 0;
|
|
|
8461 |
while (<$IN>) {
|
|
|
8462 |
++$lisp_points if /^\s*;/;
|
|
|
8463 |
++$lisp_points if /\((def|eval|require|export|let|loop|dec|format)/;
|
|
|
8464 |
++$julia_points if /^\s*(function|end|println|for|while)/;
|
|
|
8465 |
}
|
|
|
8466 |
$IN->close;
|
|
|
8467 |
# print "lisp_points=$lisp_points julia_points=$julia_points\n";
|
|
|
8468 |
if ($lisp_points > $julia_points) {
|
|
|
8469 |
$lang = "Lisp";
|
|
|
8470 |
} else {
|
|
|
8471 |
$lang = "Julia";
|
|
|
8472 |
}
|
|
|
8473 |
|
|
|
8474 |
print "<- Lisp_or_Julia\n" if $opt_v > 2;
|
|
|
8475 |
return $lang;
|
|
|
8476 |
} # 1}}}
|
|
|
8477 |
sub Perl_or_Prolog { # {{{1
|
|
|
8478 |
my ($file , # in
|
|
|
8479 |
$rh_Err , # in hash of error codes
|
|
|
8480 |
$raa_errors , # out
|
|
|
8481 |
) = @_;
|
|
|
8482 |
|
|
|
8483 |
print "-> Perl_or_Prolog\n" if $opt_v > 2;
|
|
|
8484 |
|
|
|
8485 |
my $lang = undef;
|
|
|
8486 |
my $IN = new IO::File $file, "r";
|
|
|
8487 |
if (!defined $IN) {
|
|
|
8488 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
8489 |
return $lang;
|
|
|
8490 |
}
|
|
|
8491 |
my $perl_points = 0;
|
|
|
8492 |
my $prolog_points = 0;
|
|
|
8493 |
while (<$IN>) {
|
|
|
8494 |
++$perl_points if /;\s*$/;
|
|
|
8495 |
++$perl_points if /({|})/;
|
|
|
8496 |
++$perl_points if /^\s*sub\s+/;
|
|
|
8497 |
++$prolog_points if /\.\s*$/;
|
|
|
8498 |
++$prolog_points if /:-/;
|
|
|
8499 |
}
|
|
|
8500 |
$IN->close;
|
|
|
8501 |
# print "perl_points=$perl_points prolog_points=$prolog_points\n";
|
|
|
8502 |
if ($perl_points > $prolog_points) {
|
|
|
8503 |
$lang = "Perl";
|
|
|
8504 |
} else {
|
|
|
8505 |
$lang = "Prolog";
|
|
|
8506 |
}
|
|
|
8507 |
|
|
|
8508 |
print "<- Perl_or_Prolog\n" if $opt_v > 2;
|
|
|
8509 |
return $lang;
|
|
|
8510 |
} # 1}}}
|
|
|
8511 |
sub IDL_or_QtProject { # {{{1
|
|
|
8512 |
# also Prolog
|
|
|
8513 |
my ($file , # in
|
|
|
8514 |
$rh_Err , # in hash of error codes
|
|
|
8515 |
$raa_errors , # out
|
|
|
8516 |
) = @_;
|
|
|
8517 |
|
|
|
8518 |
print "-> IDL_or_QtProject($file)\n" if $opt_v > 2;
|
|
|
8519 |
|
|
|
8520 |
my $lang = undef;
|
|
|
8521 |
my $IN = new IO::File $file, "r";
|
|
|
8522 |
if (!defined $IN) {
|
|
|
8523 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
8524 |
return $lang;
|
|
|
8525 |
}
|
|
|
8526 |
my $idl_points = 0;
|
|
|
8527 |
my $qtproj_points = 0;
|
|
|
8528 |
my $prolog_points = 0;
|
|
|
8529 |
while (<$IN>) {
|
|
|
8530 |
++$idl_points if /^\s*;/;
|
|
|
8531 |
++$idl_points if /plot\(/i;
|
|
|
8532 |
++$qtproj_points if /^\s*(qt|configs|sources)\s*\+?=/i;
|
|
|
8533 |
++$prolog_points if /\.\s*$/;
|
|
|
8534 |
++$prolog_points if /:-/;
|
|
|
8535 |
}
|
|
|
8536 |
$IN->close;
|
|
|
8537 |
# print "idl_points=$idl_points qtproj_points=$qtproj_points\n";
|
|
|
8538 |
|
|
|
8539 |
if ($idl_points > $qtproj_points) {
|
|
|
8540 |
$lang = "IDL";
|
|
|
8541 |
} else {
|
|
|
8542 |
$lang = "Qt Project";
|
|
|
8543 |
}
|
|
|
8544 |
|
|
|
8545 |
my %points = ( 'IDL' => $idl_points ,
|
|
|
8546 |
'Qt Project' => $qtproj_points ,
|
|
|
8547 |
'Prolog' => $prolog_points , );
|
|
|
8548 |
|
|
|
8549 |
$lang = (sort { $points{$b} <=> $points{$a}} keys %points)[0];
|
|
|
8550 |
|
|
|
8551 |
print "<- IDL_or_QtProject(idl_points=$idl_points, ",
|
|
|
8552 |
"qtproj_points=$qtproj_points, prolog_points=$prolog_points)\n"
|
|
|
8553 |
if $opt_v > 2;
|
|
|
8554 |
return $lang;
|
|
|
8555 |
} # 1}}}
|
|
|
8556 |
sub Ant_or_XML { # {{{1
|
|
|
8557 |
my ($file , # in
|
|
|
8558 |
$rh_Err , # in hash of error codes
|
|
|
8559 |
$raa_errors , # out
|
|
|
8560 |
) = @_;
|
|
|
8561 |
|
|
|
8562 |
print "-> Ant_or_XML($file)\n" if $opt_v > 2;
|
|
|
8563 |
|
|
|
8564 |
my $lang = "XML";
|
|
|
8565 |
my $IN = new IO::File $file, "r";
|
|
|
8566 |
if (!defined $IN) {
|
|
|
8567 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
8568 |
return $lang;
|
|
|
8569 |
}
|
|
|
8570 |
my $Ant_points = 0;
|
|
|
8571 |
my $XML_points = 1;
|
|
|
8572 |
while (<$IN>) {
|
|
|
8573 |
if (/^\s*<project\s+/) {
|
|
|
8574 |
++$Ant_points ;
|
|
|
8575 |
--$XML_points ;
|
|
|
8576 |
}
|
|
|
8577 |
if (/xmlns:artifact="antlib:org.apache.maven.artifact.ant"/) {
|
|
|
8578 |
++$Ant_points ;
|
|
|
8579 |
--$XML_points ;
|
|
|
8580 |
}
|
|
|
8581 |
}
|
|
|
8582 |
$IN->close;
|
|
|
8583 |
|
|
|
8584 |
if ($XML_points >= $Ant_points) {
|
|
|
8585 |
# tie or better goes to XML
|
|
|
8586 |
$lang = "XML";
|
|
|
8587 |
} else {
|
|
|
8588 |
$lang = "Ant";
|
|
|
8589 |
}
|
|
|
8590 |
|
|
|
8591 |
print "<- Ant_or_XML($lang)\n" if $opt_v > 2;
|
|
|
8592 |
return $lang;
|
|
|
8593 |
} # 1}}}
|
|
|
8594 |
sub Maven_or_XML { # {{{1
|
|
|
8595 |
my ($file , # in
|
|
|
8596 |
$rh_Err , # in hash of error codes
|
|
|
8597 |
$raa_errors , # out
|
|
|
8598 |
) = @_;
|
|
|
8599 |
|
|
|
8600 |
print "-> Maven_or_XML($file)\n" if $opt_v > 2;
|
|
|
8601 |
|
|
|
8602 |
my $lang = "XML";
|
|
|
8603 |
my $IN = new IO::File $file, "r";
|
|
|
8604 |
if (!defined $IN) {
|
|
|
8605 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
8606 |
return $lang;
|
|
|
8607 |
}
|
|
|
8608 |
my $Mvn_points = 0;
|
|
|
8609 |
my $XML_points = 1;
|
|
|
8610 |
while (<$IN>) {
|
|
|
8611 |
if (/^\s*<project\s+/) {
|
|
|
8612 |
++$Mvn_points ;
|
|
|
8613 |
--$XML_points ;
|
|
|
8614 |
}
|
|
|
8615 |
if (m{xmlns="http://maven.apache.org/POM/}) {
|
|
|
8616 |
++$Mvn_points ;
|
|
|
8617 |
--$XML_points ;
|
|
|
8618 |
}
|
|
|
8619 |
}
|
|
|
8620 |
$IN->close;
|
|
|
8621 |
|
|
|
8622 |
if ($XML_points >= $Mvn_points) {
|
|
|
8623 |
# tie or better goes to XML
|
|
|
8624 |
$lang = "XML";
|
|
|
8625 |
} else {
|
|
|
8626 |
$lang = "Maven";
|
|
|
8627 |
}
|
|
|
8628 |
|
|
|
8629 |
print "<- Maven_or_XML($lang)\n" if $opt_v > 2;
|
|
|
8630 |
return $lang;
|
|
|
8631 |
} # 1}}}
|
|
|
8632 |
sub pascal_or_puppet { # {{{1
|
|
|
8633 |
# Decide if code is Pascal or Puppet manifest
|
|
|
8634 |
my ($file , # in
|
|
|
8635 |
$rh_Err , # in hash of error codes
|
|
|
8636 |
$raa_errors , # out
|
|
|
8637 |
$rs_language , # out
|
|
|
8638 |
) = @_;
|
|
|
8639 |
|
|
|
8640 |
print "-> pascal_or_puppet\n" if $opt_v > 2;
|
|
|
8641 |
|
|
|
8642 |
${$rs_language} = "";
|
|
|
8643 |
my $IN = new IO::File $file, "r";
|
|
|
8644 |
if (!defined $IN) {
|
|
|
8645 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
8646 |
return;
|
|
|
8647 |
}
|
|
|
8648 |
|
|
|
8649 |
my $DEBUG = 0;
|
|
|
8650 |
my $pascal_points = 0;
|
|
|
8651 |
my $puppet_points = 0;
|
|
|
8652 |
|
|
|
8653 |
while (<$IN>) {
|
|
|
8654 |
++$pascal_points if /\bprogram\s+[A-Za-z]/i;
|
|
|
8655 |
++$pascal_points if /\bunit\s+[A-Za-z]/i;
|
|
|
8656 |
++$pascal_points if /\bmodule\s+[A-Za-z]/i;
|
|
|
8657 |
++$pascal_points if /\bprocedure\b/i;
|
|
|
8658 |
++$pascal_points if /\bfunction\b/i;
|
|
|
8659 |
++$pascal_points if /^\s*interface\s+/i;
|
|
|
8660 |
++$pascal_points if /^\s*implementation\s+/i;
|
|
|
8661 |
++$pascal_points if /\bbegin\b/i;
|
|
|
8662 |
++$pascal_points if /\bend\b/i;
|
|
|
8663 |
|
|
|
8664 |
++$puppet_points if /^\s*class\s+/;
|
|
|
8665 |
++$puppet_points if /^\s*case\s+/;
|
|
|
8666 |
++$puppet_points if /^\s*package\s+/;
|
|
|
8667 |
++$puppet_points if /^\s*file\s+/;
|
|
|
8668 |
++$puppet_points if /^\s*service\s+/;
|
|
|
8669 |
}
|
|
|
8670 |
$IN->close;
|
|
|
8671 |
|
|
|
8672 |
print "<- pascal_or_puppet(pascal=$pascal_points, puppet=$puppet_points)\n"
|
|
|
8673 |
if $opt_v > 2;
|
|
|
8674 |
|
|
|
8675 |
if ($pascal_points > $puppet_points) {
|
|
|
8676 |
${$rs_language} = "Pascal";
|
|
|
8677 |
} else {
|
|
|
8678 |
${$rs_language} = "Puppet";
|
|
|
8679 |
}
|
|
|
8680 |
|
|
|
8681 |
} # 1}}}
|
|
|
8682 |
sub html_colored_text { # {{{1
|
|
|
8683 |
# http://www.pagetutor.com/pagetutor/makapage/pics/net216-2.gif
|
|
|
8684 |
my ($color, $text) = @_;
|
|
|
8685 |
#?#die "html_colored_text($text)";
|
|
|
8686 |
if ($color =~ /^red$/i) {
|
|
|
8687 |
$color = "#ff0000";
|
|
|
8688 |
} elsif ($color =~ /^green$/i) {
|
|
|
8689 |
$color = "#00ff00";
|
|
|
8690 |
} elsif ($color =~ /^blue$/i) {
|
|
|
8691 |
$color = "#0000ff";
|
|
|
8692 |
} elsif ($color =~ /^grey$/i) {
|
|
|
8693 |
$color = "#cccccc";
|
|
|
8694 |
}
|
|
|
8695 |
# return "" unless $text;
|
|
|
8696 |
return '<font color="' . $color . '">' . html_metachars($text) . "</font>";
|
|
|
8697 |
} # 1}}}
|
|
|
8698 |
sub xml_metachars { # {{{1
|
|
|
8699 |
# http://en.wikipedia.org/wiki/Character_encodings_in_HTML#XML_character_references
|
|
|
8700 |
my ($string, ) = shift @_;
|
|
|
8701 |
|
|
|
8702 |
my @in_chars = split(//, $string);
|
|
|
8703 |
my @out_chars = ();
|
|
|
8704 |
foreach my $c (@in_chars) {
|
|
|
8705 |
if ($c eq '&') { push @out_chars, '&'
|
|
|
8706 |
} elsif ($c eq '<') { push @out_chars, '<'
|
|
|
8707 |
} elsif ($c eq '>') { push @out_chars, '>'
|
|
|
8708 |
} elsif ($c eq '"') { push @out_chars, '"'
|
|
|
8709 |
} elsif ($c eq "'") { push @out_chars, '''
|
|
|
8710 |
} else {
|
|
|
8711 |
push @out_chars, $c;
|
|
|
8712 |
}
|
|
|
8713 |
}
|
|
|
8714 |
return join "", @out_chars;
|
|
|
8715 |
} # 1}}}
|
|
|
8716 |
sub html_metachars { # {{{1
|
|
|
8717 |
# Replace HTML metacharacters with their printable forms.
|
|
|
8718 |
# Future: use HTML-Encoder-0.00_04/lib/HTML/Encoder.pm
|
|
|
8719 |
# from Fabiano Reese Righetti's HTML::Encoder module if
|
|
|
8720 |
# this subroutine proves to be too simplistic.
|
|
|
8721 |
my ($string, ) = shift @_;
|
|
|
8722 |
|
|
|
8723 |
my @in_chars = split(//, $string);
|
|
|
8724 |
my @out_chars = ();
|
|
|
8725 |
foreach my $c (@in_chars) {
|
|
|
8726 |
if ($c eq '<') {
|
|
|
8727 |
push @out_chars, '<'
|
|
|
8728 |
} elsif ($c eq '>') {
|
|
|
8729 |
push @out_chars, '>'
|
|
|
8730 |
} elsif ($c eq '&') {
|
|
|
8731 |
push @out_chars, '&'
|
|
|
8732 |
} else {
|
|
|
8733 |
push @out_chars, $c;
|
|
|
8734 |
}
|
|
|
8735 |
}
|
|
|
8736 |
return join "", @out_chars;
|
|
|
8737 |
} # 1}}}
|
|
|
8738 |
sub test_alg_diff { # {{{1
|
|
|
8739 |
my ($file_1 ,
|
|
|
8740 |
$file_2 )
|
|
|
8741 |
= @_;
|
|
|
8742 |
my $fh_1 = new IO::File $file_1, "r";
|
|
|
8743 |
die "Unable to read $file_1: $!\n" unless defined $fh_1;
|
|
|
8744 |
chomp(my @lines_1 = <$fh_1>);
|
|
|
8745 |
$fh_1->close;
|
|
|
8746 |
|
|
|
8747 |
my $fh_2 = new IO::File $file_2, "r";
|
|
|
8748 |
die "Unable to read $file_2: $!\n" unless defined $fh_2;
|
|
|
8749 |
chomp(my @lines_2 = <$fh_2>);
|
|
|
8750 |
$fh_2->close;
|
|
|
8751 |
|
|
|
8752 |
my $n_no_change = 0;
|
|
|
8753 |
my $n_modified = 0;
|
|
|
8754 |
my $n_added = 0;
|
|
|
8755 |
my $n_deleted = 0;
|
|
|
8756 |
my @min_sdiff = ();
|
|
|
8757 |
my $NN = chr(27) . "[0m"; # normal
|
|
|
8758 |
my $BB = chr(27) . "[1m"; # bold
|
|
|
8759 |
|
|
|
8760 |
my @sdiffs = sdiff( \@lines_1, \@lines_2 );
|
|
|
8761 |
foreach my $entry (@sdiffs) {
|
|
|
8762 |
my ($out_1, $out_2) = ('', '');
|
|
|
8763 |
if ($entry->[0] eq 'u') {
|
|
|
8764 |
++$n_no_change;
|
|
|
8765 |
# $out_1 = $entry->[1];
|
|
|
8766 |
# $out_2 = $entry->[2];
|
|
|
8767 |
next;
|
|
|
8768 |
}
|
|
|
8769 |
# push @min_sdiff, $entry;
|
|
|
8770 |
if ($entry->[0] eq 'c') {
|
|
|
8771 |
++$n_modified;
|
|
|
8772 |
($out_1, $out_2) = diff_two_strings($entry->[1], $entry->[2]);
|
|
|
8773 |
$out_1 =~ s/\cA(\w)/${BB}$1${NN}/g;
|
|
|
8774 |
$out_2 =~ s/\cA(\w)/${BB}$1${NN}/g;
|
|
|
8775 |
# $out_1 =~ s/\cA//g;
|
|
|
8776 |
# $out_2 =~ s/\cA//g;
|
|
|
8777 |
} elsif ($entry->[0] eq '+') {
|
|
|
8778 |
++$n_added;
|
|
|
8779 |
$out_1 = $entry->[1];
|
|
|
8780 |
$out_2 = $entry->[2];
|
|
|
8781 |
} elsif ($entry->[0] eq '-') {
|
|
|
8782 |
++$n_deleted;
|
|
|
8783 |
$out_1 = $entry->[1];
|
|
|
8784 |
$out_2 = $entry->[2];
|
|
|
8785 |
} elsif ($entry->[0] eq 'u') {
|
|
|
8786 |
} else { die "unknown entry->[0]=[$entry->[0]]\n"; }
|
|
|
8787 |
printf "%-80s | %s\n", $out_1, $out_2;
|
|
|
8788 |
}
|
|
|
8789 |
|
|
|
8790 |
# foreach my $entry (@min_sdiff) {
|
|
|
8791 |
# printf "DIFF: %s %s %s\n", @{$entry};
|
|
|
8792 |
# }
|
|
|
8793 |
} # 1}}}
|
|
|
8794 |
sub write_comments_to_html { # {{{1
|
|
|
8795 |
my ($filename , # in
|
|
|
8796 |
$rah_diff_L , # in see routine array_diff() for explanation
|
|
|
8797 |
$rah_diff_R , # in see routine array_diff() for explanation
|
|
|
8798 |
$rh_blank , # in location and counts of blank lines
|
|
|
8799 |
) = @_;
|
|
|
8800 |
|
|
|
8801 |
print "-> write_comments_to_html($filename)\n" if $opt_v > 2;
|
|
|
8802 |
my $file = $filename . ".html";
|
|
|
8803 |
#use Data::Dumper;
|
|
|
8804 |
#print Dumper("rah_diff_L", $rah_diff_L, "rah_diff_R", $rah_diff_R);
|
|
|
8805 |
my $OUT = new IO::File $file, "w";
|
|
|
8806 |
if (!defined $OUT) {
|
|
|
8807 |
warn "Unable to write to $file\n";
|
|
|
8808 |
print "<- write_comments_to_html\n" if $opt_v > 2;
|
|
|
8809 |
return;
|
|
|
8810 |
}
|
|
|
8811 |
|
|
|
8812 |
my $approx_line_count = scalar @{$rah_diff_L};
|
|
|
8813 |
my $n_digits = 1 + int(log($approx_line_count)/2.30258509299405); # log_10
|
|
|
8814 |
|
|
|
8815 |
my $html_out = html_header($filename);
|
|
|
8816 |
|
|
|
8817 |
my $comment_line_number = 0;
|
|
|
8818 |
for (my $i = 0; $i < scalar @{$rah_diff_R}; $i++) {
|
|
|
8819 |
if (defined $rh_blank->{$i}) {
|
|
|
8820 |
foreach (1..$rh_blank->{$i}) {
|
|
|
8821 |
$html_out .= "<!-- blank -->\n";
|
|
|
8822 |
}
|
|
|
8823 |
}
|
|
|
8824 |
my $line_num = "";
|
|
|
8825 |
my $pre = "";
|
|
|
8826 |
my $post = '</span> ';
|
|
|
8827 |
warn "undef rah_diff_R[$i]{type} " unless defined $rah_diff_R->[$i]{type};
|
|
|
8828 |
if ($rah_diff_R->[$i]{type} eq 'nonexist') {
|
|
|
8829 |
++$comment_line_number;
|
|
|
8830 |
$line_num = sprintf "\ <span class=\"clinenum\"> %0${n_digits}d %s",
|
|
|
8831 |
$comment_line_number, $post;
|
|
|
8832 |
$pre = '<span class="comment">';
|
|
|
8833 |
$html_out .= $line_num;
|
|
|
8834 |
$html_out .= $pre .
|
|
|
8835 |
html_metachars($rah_diff_L->[$i]{char}) .
|
|
|
8836 |
$post . "\n";
|
|
|
8837 |
next;
|
|
|
8838 |
}
|
|
|
8839 |
if ($rah_diff_R->[$i]{type} eq 'code' and
|
|
|
8840 |
$rah_diff_R->[$i]{desc} eq 'same') {
|
|
|
8841 |
# entire line remains as-is
|
|
|
8842 |
$line_num = sprintf "\ <span class=\"linenum\"> %0${n_digits}d %s",
|
|
|
8843 |
$rah_diff_R->[$i]{lnum}, $post;
|
|
|
8844 |
$pre = '<span class="normal">';
|
|
|
8845 |
$html_out .= $line_num;
|
|
|
8846 |
$html_out .= $pre .
|
|
|
8847 |
html_metachars($rah_diff_R->[$i]{char}) . $post;
|
|
|
8848 |
#XX } elsif ($rah_diff_R->[$i]{type} eq 'code') { # code+comments
|
|
|
8849 |
#XX
|
|
|
8850 |
#XX $line_num = '<span class="linenum">' .
|
|
|
8851 |
#XX $rah_diff_R->[$i]{lnum} . $post;
|
|
|
8852 |
#XX $html_out .= $line_num;
|
|
|
8853 |
#XX
|
|
|
8854 |
#XX my @strings = @{$rah_diff_R->[$i]{char}{strings}};
|
|
|
8855 |
#XX my @type = @{$rah_diff_R->[$i]{char}{type}};
|
|
|
8856 |
#XX for (my $i = 0; $i < scalar @strings; $i++) {
|
|
|
8857 |
#XX if ($type[$i] eq 'u') {
|
|
|
8858 |
#XX $pre = '<span class="normal">';
|
|
|
8859 |
#XX } else {
|
|
|
8860 |
#XX $pre = '<span class="comment">';
|
|
|
8861 |
#XX }
|
|
|
8862 |
#XX $html_out .= $pre . html_metachars($strings[$i]) . $post;
|
|
|
8863 |
#XX }
|
|
|
8864 |
# print Dumper(@strings, @type); die;
|
|
|
8865 |
|
|
|
8866 |
} elsif ($rah_diff_R->[$i]{type} eq 'comment') {
|
|
|
8867 |
$line_num = '<span class="clinenum">' . $comment_line_number . $post;
|
|
|
8868 |
# entire line is a comment
|
|
|
8869 |
$pre = '<span class="comment">';
|
|
|
8870 |
$html_out .= $pre .
|
|
|
8871 |
html_metachars($rah_diff_R->[$i]{char}) . $post;
|
|
|
8872 |
}
|
|
|
8873 |
#printf "%-30s %s %-30s\n", $line_1, $separator, $line_2;
|
|
|
8874 |
$html_out .= "\n";
|
|
|
8875 |
}
|
|
|
8876 |
|
|
|
8877 |
$html_out .= html_end();
|
|
|
8878 |
|
|
|
8879 |
my $out_file = "$filename.html";
|
|
|
8880 |
open OUT, ">$out_file" or die "Cannot write to $out_file $!\n";
|
|
|
8881 |
print OUT $html_out;
|
|
|
8882 |
close OUT;
|
|
|
8883 |
print "Wrote $out_file\n" unless $opt_quiet;
|
|
|
8884 |
$OUT->close;
|
|
|
8885 |
|
|
|
8886 |
print "<- write_comments_to_html\n" if $opt_v > 2;
|
|
|
8887 |
} # 1}}}
|
|
|
8888 |
sub array_diff { # {{{1
|
|
|
8889 |
my ($file , # in only used for error reporting
|
|
|
8890 |
$ra_lines_L , # in array of lines in Left file (no blank lines)
|
|
|
8891 |
$ra_lines_R , # in array of lines in Right file (no blank lines)
|
|
|
8892 |
$mode , # in "comment" | "revision"
|
|
|
8893 |
$rah_diff_L , # out
|
|
|
8894 |
$rah_diff_R , # out
|
|
|
8895 |
$raa_Errors , # in/out
|
|
|
8896 |
) = @_;
|
|
|
8897 |
|
|
|
8898 |
# This routine operates in two ways:
|
|
|
8899 |
# A. Computes diffs of the same file with and without comments.
|
|
|
8900 |
# This is used to classify lines as code, comments, or blank.
|
|
|
8901 |
# B. Computes diffs of two revisions of a file. This method
|
|
|
8902 |
# requires a prior run of method A using the older version
|
|
|
8903 |
# of the file because it needs lines to be classified.
|
|
|
8904 |
|
|
|
8905 |
# $rah_diff structure:
|
|
|
8906 |
# An array with n entries where n equals the number of lines in
|
|
|
8907 |
# an sdiff of the two files. Each entry in the array describes
|
|
|
8908 |
# the contents of the corresponding line in file Left and file Right:
|
|
|
8909 |
# diff[]{type} = blank | code | code+comment | comment | nonexist
|
|
|
8910 |
# {lnum} = line number within the original file (1-based)
|
|
|
8911 |
# {desc} = same | added | removed | modified
|
|
|
8912 |
# {char} = the input line unless {desc} = 'modified' in
|
|
|
8913 |
# which case
|
|
|
8914 |
# {char}{strings} = [ substrings ]
|
|
|
8915 |
# {char}{type} = [ disposition (added, removed, etc)]
|
|
|
8916 |
#
|
|
|
8917 |
|
|
|
8918 |
@{$rah_diff_L} = ();
|
|
|
8919 |
@{$rah_diff_R} = ();
|
|
|
8920 |
|
|
|
8921 |
print "-> array_diff()\n" if $opt_v > 2;
|
|
|
8922 |
my $COMMENT_MODE = 0;
|
|
|
8923 |
$COMMENT_MODE = 1 if $mode eq "comment";
|
|
|
8924 |
|
|
|
8925 |
#print "array_diff(mode=$mode)\n";
|
|
|
8926 |
#print Dumper("block left:" , $ra_lines_L);
|
|
|
8927 |
#print Dumper("block right:", $ra_lines_R);
|
|
|
8928 |
|
|
|
8929 |
my @sdiffs = ();
|
|
|
8930 |
eval {
|
|
|
8931 |
local $SIG{ALRM} = sub { die "alarm\n" };
|
|
|
8932 |
alarm $opt_diff_timeout;
|
|
|
8933 |
@sdiffs = sdiff($ra_lines_L, $ra_lines_R);
|
|
|
8934 |
alarm 0;
|
|
|
8935 |
};
|
|
|
8936 |
if ($@) {
|
|
|
8937 |
# timed out
|
|
|
8938 |
die unless $@ eq "alarm\n"; # propagate unexpected errors
|
|
|
8939 |
push @{$raa_Errors},
|
|
|
8940 |
[ $Error_Codes{'Diff error, exceeded timeout'}, $file ];
|
|
|
8941 |
if ($opt_v) {
|
|
|
8942 |
warn "array_diff: diff timeout failure for $file--ignoring\n";
|
|
|
8943 |
}
|
|
|
8944 |
return;
|
|
|
8945 |
}
|
|
|
8946 |
|
|
|
8947 |
#use Data::Dumper::Simple;
|
|
|
8948 |
#print Dumper($ra_lines_L, $ra_lines_R, @sdiffs);
|
|
|
8949 |
#die;
|
|
|
8950 |
|
|
|
8951 |
my $n_L = 0;
|
|
|
8952 |
my $n_R = 0;
|
|
|
8953 |
my $n_sdiff = 0; # index to $rah_diff_L, $rah_diff_R
|
|
|
8954 |
foreach my $triple (@sdiffs) {
|
|
|
8955 |
my $flag = $triple->[0];
|
|
|
8956 |
my $line_L = $triple->[1];
|
|
|
8957 |
my $line_R = $triple->[2];
|
|
|
8958 |
$rah_diff_L->[$n_sdiff]{char} = $line_L;
|
|
|
8959 |
$rah_diff_R->[$n_sdiff]{char} = $line_R;
|
|
|
8960 |
if ($flag eq 'u') { # u = unchanged
|
|
|
8961 |
++$n_L;
|
|
|
8962 |
++$n_R;
|
|
|
8963 |
if ($COMMENT_MODE) {
|
|
|
8964 |
# line exists in both with & without comments, must be code
|
|
|
8965 |
$rah_diff_L->[$n_sdiff]{type} = "code";
|
|
|
8966 |
$rah_diff_R->[$n_sdiff]{type} = "code";
|
|
|
8967 |
}
|
|
|
8968 |
$rah_diff_L->[$n_sdiff]{desc} = "same";
|
|
|
8969 |
$rah_diff_R->[$n_sdiff]{desc} = "same";
|
|
|
8970 |
$rah_diff_L->[$n_sdiff]{lnum} = $n_L;
|
|
|
8971 |
$rah_diff_R->[$n_sdiff]{lnum} = $n_R;
|
|
|
8972 |
} elsif ($flag eq 'c') { # c = changed
|
|
|
8973 |
# warn "per line sdiff() commented out\n"; if (0) {
|
|
|
8974 |
++$n_L;
|
|
|
8975 |
++$n_R;
|
|
|
8976 |
|
|
|
8977 |
if ($COMMENT_MODE) {
|
|
|
8978 |
# line has text both with & without comments;
|
|
|
8979 |
# count as code
|
|
|
8980 |
$rah_diff_L->[$n_sdiff]{type} = "code";
|
|
|
8981 |
$rah_diff_R->[$n_sdiff]{type} = "code";
|
|
|
8982 |
}
|
|
|
8983 |
|
|
|
8984 |
my @chars_L = split '', $line_L;
|
|
|
8985 |
my @chars_R = split '', $line_R;
|
|
|
8986 |
|
|
|
8987 |
#XX my @inline_sdiffs = sdiff( \@chars_L, \@chars_R );
|
|
|
8988 |
|
|
|
8989 |
#use Data::Dumper::Simple;
|
|
|
8990 |
#if ($n_R == 6 or $n_R == 1 or $n_R == 2) {
|
|
|
8991 |
#print "L=[$line_L]\n";
|
|
|
8992 |
#print "R=[$line_R]\n";
|
|
|
8993 |
#print Dumper(@chars_L, @chars_R, @inline_sdiffs);
|
|
|
8994 |
#}
|
|
|
8995 |
#XX my @index = ();
|
|
|
8996 |
#XX foreach my $il_triple (@inline_sdiffs) {
|
|
|
8997 |
#XX # make an array of u|c|+|- corresponding
|
|
|
8998 |
#XX # to each character
|
|
|
8999 |
#XX push @index, $il_triple->[0];
|
|
|
9000 |
#XX }
|
|
|
9001 |
#XX#print Dumper(@index); die;
|
|
|
9002 |
#XX # expect problems if arrays @index and $inline_sdiffs[1];
|
|
|
9003 |
#XX # (@{$inline_sdiffs->[1]} are the characters of line_L)
|
|
|
9004 |
#XX # aren't the same length
|
|
|
9005 |
#XX my $prev_type = $index[0];
|
|
|
9006 |
#XX my @strings = (); # blocks of consecutive code or comment
|
|
|
9007 |
#XX my @type = (); # u (=code) or c (=comment)
|
|
|
9008 |
#XX my $j_str = 0;
|
|
|
9009 |
#XX $strings[$j_str] .= $chars_L[0];
|
|
|
9010 |
#XX $type[$j_str] = $prev_type;
|
|
|
9011 |
#XX for (my $i = 1; $i < scalar @chars_L; $i++) {
|
|
|
9012 |
#XX if ($index[$i] ne $prev_type) {
|
|
|
9013 |
#XX ++$j_str;
|
|
|
9014 |
#XX#print "change at j_str=$j_str type=$index[$i]\n";
|
|
|
9015 |
#XX $type[$j_str] = $index[$i];
|
|
|
9016 |
#XX $prev_type = $index[$i];
|
|
|
9017 |
#XX }
|
|
|
9018 |
#XX $strings[$j_str] .= $chars_L[$i];
|
|
|
9019 |
#XX }
|
|
|
9020 |
# print Dumper(@strings, @type); die;
|
|
|
9021 |
#XX delete $rah_diff_R->[$n_sdiff]{char};
|
|
|
9022 |
#XX @{$rah_diff_R->[$n_sdiff]{char}{strings}} = @strings;
|
|
|
9023 |
#XX @{$rah_diff_R->[$n_sdiff]{char}{type}} = @type;
|
|
|
9024 |
$rah_diff_L->[$n_sdiff]{desc} = "modified";
|
|
|
9025 |
$rah_diff_R->[$n_sdiff]{desc} = "modified";
|
|
|
9026 |
$rah_diff_L->[$n_sdiff]{lnum} = $n_L;
|
|
|
9027 |
$rah_diff_R->[$n_sdiff]{lnum} = $n_R;
|
|
|
9028 |
#}
|
|
|
9029 |
|
|
|
9030 |
} elsif ($flag eq '+') { # + = added
|
|
|
9031 |
++$n_R;
|
|
|
9032 |
if ($COMMENT_MODE) {
|
|
|
9033 |
# should never get here
|
|
|
9034 |
@{$rah_diff_L} = ();
|
|
|
9035 |
@{$rah_diff_R} = ();
|
|
|
9036 |
push @{$raa_Errors},
|
|
|
9037 |
[ $Error_Codes{'Diff error (quoted comments?)'}, $file ];
|
|
|
9038 |
if ($opt_v) {
|
|
|
9039 |
warn "array_diff: diff failure (diff says the\n";
|
|
|
9040 |
warn "comment-free file has added lines).\n";
|
|
|
9041 |
warn "$n_sdiff $line_L\n";
|
|
|
9042 |
}
|
|
|
9043 |
last;
|
|
|
9044 |
}
|
|
|
9045 |
$rah_diff_L->[$n_sdiff]{type} = "nonexist";
|
|
|
9046 |
$rah_diff_L->[$n_sdiff]{desc} = "removed";
|
|
|
9047 |
$rah_diff_R->[$n_sdiff]{desc} = "added";
|
|
|
9048 |
$rah_diff_R->[$n_sdiff]{lnum} = $n_R;
|
|
|
9049 |
} elsif ($flag eq '-') { # - = removed
|
|
|
9050 |
++$n_L;
|
|
|
9051 |
if ($COMMENT_MODE) {
|
|
|
9052 |
# line must be comment because blanks already gone
|
|
|
9053 |
$rah_diff_L->[$n_sdiff]{type} = "comment";
|
|
|
9054 |
}
|
|
|
9055 |
$rah_diff_R->[$n_sdiff]{type} = "nonexist";
|
|
|
9056 |
$rah_diff_R->[$n_sdiff]{desc} = "removed";
|
|
|
9057 |
$rah_diff_L->[$n_sdiff]{desc} = "added";
|
|
|
9058 |
$rah_diff_L->[$n_sdiff]{lnum} = $n_L;
|
|
|
9059 |
}
|
|
|
9060 |
#printf "%-30s %s %-30s\n", $line_L, $separator, $line_R;
|
|
|
9061 |
++$n_sdiff;
|
|
|
9062 |
}
|
|
|
9063 |
#use Data::Dumper::Simple;
|
|
|
9064 |
#print Dumper($rah_diff_L, $rah_diff_R);
|
|
|
9065 |
|
|
|
9066 |
print "<- array_diff\n" if $opt_v > 2;
|
|
|
9067 |
} # 1}}}
|
|
|
9068 |
sub remove_leading_dir { # {{{1
|
|
|
9069 |
my @filenames = @_;
|
|
|
9070 |
#
|
|
|
9071 |
# Input should be a list of file names
|
|
|
9072 |
# with the same leading directory such as
|
|
|
9073 |
#
|
|
|
9074 |
# dir1/dir2/a.txt
|
|
|
9075 |
# dir1/dir2/b.txt
|
|
|
9076 |
# dir1/dir2/dir3/c.txt
|
|
|
9077 |
#
|
|
|
9078 |
# Output is the same list minus the common
|
|
|
9079 |
# directory path:
|
|
|
9080 |
#
|
|
|
9081 |
# a.txt
|
|
|
9082 |
# b.txt
|
|
|
9083 |
# dir3/c.txt
|
|
|
9084 |
#
|
|
|
9085 |
print "-> remove_leading_dir()\n" if $opt_v > 2;
|
|
|
9086 |
my @D = (); # a matrix: [ [ dir1, dir2 ], # dir1/dir2/a.txt
|
|
|
9087 |
# [ dir1, dir2 ], # dir1/dir2/b.txt
|
|
|
9088 |
# [ dir1, dir2 , dir3] ] # dir1/dir2/dir3/c.txt
|
|
|
9089 |
if ($ON_WINDOWS) {
|
|
|
9090 |
foreach my $F (@filenames) {
|
|
|
9091 |
$F =~ s{\\}{/}g;
|
|
|
9092 |
$F = ucfirst($F) if $F =~ /^\w:/; # uppercase drive letter
|
|
|
9093 |
}
|
|
|
9094 |
}
|
|
|
9095 |
if (scalar @filenames == 1) {
|
|
|
9096 |
# special case: with only one filename
|
|
|
9097 |
# cannot determine a baseline, just remove first directory level
|
|
|
9098 |
$filenames[0] =~ s{^.*?/}{};
|
|
|
9099 |
print "-> $filenames[0]\n";
|
|
|
9100 |
return $filenames[0];
|
|
|
9101 |
}
|
|
|
9102 |
foreach my $F (@filenames) {
|
|
|
9103 |
my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
|
|
|
9104 |
my @x = File::Spec->splitdir( $Dir );
|
|
|
9105 |
pop @x unless $x[$#x]; # last entry usually null, remove it
|
|
|
9106 |
if ($ON_WINDOWS) {
|
|
|
9107 |
if (defined($Vol) and $Vol) {
|
|
|
9108 |
# put the drive letter, eg, C:, at the front
|
|
|
9109 |
unshift @x, uc $Vol;
|
|
|
9110 |
}
|
|
|
9111 |
}
|
|
|
9112 |
#print "F=$F, Dir=$Dir x=[", join("][", @x), "]\n";
|
|
|
9113 |
push @D, [ @x ];
|
|
|
9114 |
}
|
|
|
9115 |
|
|
|
9116 |
# now loop over columns until either they are all
|
|
|
9117 |
# eliminated or a unique column is found
|
|
|
9118 |
|
|
|
9119 |
#use Data::Dumper::Simple;
|
|
|
9120 |
#print Dumper("remove_leading_dir after ", @D);
|
|
|
9121 |
|
|
|
9122 |
my @common = (); # to contain the common leading directories
|
|
|
9123 |
my $mismatch = 0;
|
|
|
9124 |
while (!$mismatch) {
|
|
|
9125 |
for (my $row = 1; $row < scalar @D; $row++) {
|
|
|
9126 |
#print "comparing $D[$row][0] to $D[0][0]\n";
|
|
|
9127 |
|
|
|
9128 |
if (!defined $D[$row][0] or !defined $D[0][0] or
|
|
|
9129 |
($D[$row][0] ne $D[0][0])) {
|
|
|
9130 |
$mismatch = 1;
|
|
|
9131 |
last;
|
|
|
9132 |
}
|
|
|
9133 |
}
|
|
|
9134 |
#print "mismatch=$mismatch\n";
|
|
|
9135 |
if (!$mismatch) {
|
|
|
9136 |
push @common, $D[0][0];
|
|
|
9137 |
# all terms in the leading match; unshift the batch
|
|
|
9138 |
foreach my $ra (@D) {
|
|
|
9139 |
shift @{$ra};
|
|
|
9140 |
}
|
|
|
9141 |
}
|
|
|
9142 |
}
|
|
|
9143 |
|
|
|
9144 |
push @common, " "; # so that $leading will end with "/ "
|
|
|
9145 |
my $leading = File::Spec->catdir( @common );
|
|
|
9146 |
$leading =~ s{ $}{}; # now take back the bogus appended space
|
|
|
9147 |
#print "remove_leading_dir leading=[$leading]\n"; die;
|
|
|
9148 |
if ($ON_WINDOWS) {
|
|
|
9149 |
$leading =~ s{\\}{/}g;
|
|
|
9150 |
}
|
|
|
9151 |
foreach my $F (@filenames) {
|
|
|
9152 |
$F =~ s{^$leading}{};
|
|
|
9153 |
}
|
|
|
9154 |
|
|
|
9155 |
print "<- remove_leading_dir()\n" if $opt_v > 2;
|
|
|
9156 |
return @filenames;
|
|
|
9157 |
|
|
|
9158 |
} # 1}}}
|
|
|
9159 |
sub strip_leading_dir { # {{{1
|
|
|
9160 |
my ($leading, @filenames) = @_;
|
|
|
9161 |
# removes the string $leading from each entry in @filenames
|
|
|
9162 |
print "-> strip_leading_dir()\n" if $opt_v > 2;
|
|
|
9163 |
|
|
|
9164 |
#print "remove_leading_dir leading=[$leading]\n"; die;
|
|
|
9165 |
if ($ON_WINDOWS) {
|
|
|
9166 |
$leading =~ s{\\}{/}g;
|
|
|
9167 |
foreach my $F (@filenames) {
|
|
|
9168 |
$F =~ s{\\}{/}g;
|
|
|
9169 |
}
|
|
|
9170 |
}
|
|
|
9171 |
foreach my $F (@filenames) {
|
|
|
9172 |
$F =~ s{^$leading}{};
|
|
|
9173 |
}
|
|
|
9174 |
|
|
|
9175 |
print "<- strip_leading_dir()\n" if $opt_v > 2;
|
|
|
9176 |
return @filenames;
|
|
|
9177 |
|
|
|
9178 |
} # 1}}}
|
|
|
9179 |
sub find_deepest_file { # {{{1
|
|
|
9180 |
my @filenames = @_;
|
|
|
9181 |
#
|
|
|
9182 |
# Input should be a list of file names
|
|
|
9183 |
# with the same leading directory such as
|
|
|
9184 |
#
|
|
|
9185 |
# dir1/dir2/a.txt
|
|
|
9186 |
# dir1/dir2/b.txt
|
|
|
9187 |
# dir1/dir2/dir3/c.txt
|
|
|
9188 |
#
|
|
|
9189 |
# Output is the file with the most parent directories:
|
|
|
9190 |
#
|
|
|
9191 |
# dir1/dir2/dir3/c.txt
|
|
|
9192 |
|
|
|
9193 |
print "-> find_deepest_file()\n" if $opt_v > 2;
|
|
|
9194 |
|
|
|
9195 |
my $deepest = undef;
|
|
|
9196 |
my $max_subdir = -1;
|
|
|
9197 |
foreach my $F (sort @filenames) {
|
|
|
9198 |
my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
|
|
|
9199 |
my @x = File::Spec->splitdir( $Dir );
|
|
|
9200 |
pop @x unless $x[$#x]; # last entry usually null, remove it
|
|
|
9201 |
if (scalar @x > $max_subdir) {
|
|
|
9202 |
$deepest = $F;
|
|
|
9203 |
$max_subdir = scalar @x;
|
|
|
9204 |
}
|
|
|
9205 |
}
|
|
|
9206 |
|
|
|
9207 |
print "<- find_deepest_file()\n" if $opt_v > 2;
|
|
|
9208 |
return $deepest;
|
|
|
9209 |
|
|
|
9210 |
} # 1}}}
|
|
|
9211 |
sub find_uncommon_parent_dir { # {{{1
|
|
|
9212 |
my ($file_L, $file_R) = @_;
|
|
|
9213 |
#
|
|
|
9214 |
# example:
|
|
|
9215 |
#
|
|
|
9216 |
# file_L = "perl-5.16.1/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
|
|
|
9217 |
# file_R = "/tmp/8VxQG0OLbp/perl-5.16.3/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
|
|
|
9218 |
#
|
|
|
9219 |
# then return
|
|
|
9220 |
#
|
|
|
9221 |
# "perl-5.16.1",
|
|
|
9222 |
# "/tmp/8VxQG0OLbp/perl-5.16.3",
|
|
|
9223 |
|
|
|
9224 |
my ($Vol_L, $Dir_L, $File_L) = File::Spec->splitpath($file_L);
|
|
|
9225 |
my @x_L = File::Spec->splitdir( $Dir_L );
|
|
|
9226 |
my ($Vol_R, $Dir_R, $File_R) = File::Spec->splitpath($file_R);
|
|
|
9227 |
my @x_R = File::Spec->splitdir( $Dir_R );
|
|
|
9228 |
|
|
|
9229 |
my @common = ();
|
|
|
9230 |
|
|
|
9231 |
# work backwards
|
|
|
9232 |
while ($x_L[$#x_L] eq $x_R[$#x_R]) {
|
|
|
9233 |
push @common, $x_L[$#x_L];
|
|
|
9234 |
pop @x_L;
|
|
|
9235 |
pop @x_R;
|
|
|
9236 |
}
|
|
|
9237 |
my $success = scalar @common;
|
|
|
9238 |
|
|
|
9239 |
my $dirs_L = File::Spec->catdir( @x_L );
|
|
|
9240 |
my $dirs_R = File::Spec->catdir( @x_R );
|
|
|
9241 |
my $lead_L = File::Spec->catpath( $Vol_L, $dirs_L, "" );
|
|
|
9242 |
my $lead_R = File::Spec->catpath( $Vol_R, $dirs_R, "" );
|
|
|
9243 |
|
|
|
9244 |
return $lead_L, $lead_R, $success;
|
|
|
9245 |
|
|
|
9246 |
} # 1}}}
|
|
|
9247 |
sub get_leading_dirs { # {{{1
|
|
|
9248 |
my ($rh_file_list_L, $rh_file_list_R) = @_;
|
|
|
9249 |
# find uniquely named files in both sets to help determine the
|
|
|
9250 |
# leading directory positions
|
|
|
9251 |
my %unique_filename = ();
|
|
|
9252 |
my %basename_L = ();
|
|
|
9253 |
my %basename_R = ();
|
|
|
9254 |
foreach my $f (keys %{$rh_file_list_L}) {
|
|
|
9255 |
my $bn = basename($f);
|
|
|
9256 |
$basename_L{ $bn }{'count'} += 1;
|
|
|
9257 |
$basename_L{ $bn }{'fullpath'} = $f;
|
|
|
9258 |
}
|
|
|
9259 |
foreach my $f (keys %{$rh_file_list_R}) {
|
|
|
9260 |
my $bn = basename($f);
|
|
|
9261 |
$basename_R{ $bn }{'count'} += 1;
|
|
|
9262 |
$basename_R{ $bn }{'fullpath'} = $f;
|
|
|
9263 |
}
|
|
|
9264 |
foreach my $f (keys %basename_L) {
|
|
|
9265 |
next unless $basename_L{$f}{'count'} == 1;
|
|
|
9266 |
next unless defined $basename_R{$f} and $basename_R{$f}{'count'} == 1;
|
|
|
9267 |
$unique_filename{$f}{'L'} = $basename_L{ $f }{'fullpath'};
|
|
|
9268 |
$unique_filename{$f}{'R'} = $basename_R{ $f }{'fullpath'};
|
|
|
9269 |
}
|
|
|
9270 |
return undef, undef, 0 unless %unique_filename;
|
|
|
9271 |
|
|
|
9272 |
my %candidate_leading_dir_L = ();
|
|
|
9273 |
my %candidate_leading_dir_R = ();
|
|
|
9274 |
foreach my $f (keys %unique_filename) {
|
|
|
9275 |
my $fL = $unique_filename{ $f }{'L'};
|
|
|
9276 |
my $fR = $unique_filename{ $f }{'R'};
|
|
|
9277 |
#printf "%-36s -> %-36s\n", $fL, $fR;
|
|
|
9278 |
my $ptr_L = length($fL) - 1;
|
|
|
9279 |
my $ptr_R = length($fR) - 1;
|
|
|
9280 |
my @aL = split '', $fL;
|
|
|
9281 |
my @aR = split '', $fR;
|
|
|
9282 |
while ($ptr_L >= 0 and $ptr_R >= 0) {
|
|
|
9283 |
last if $aL[$ptr_L] ne $aR[$ptr_R];
|
|
|
9284 |
--$ptr_L;
|
|
|
9285 |
--$ptr_R;
|
|
|
9286 |
}
|
|
|
9287 |
#print "ptr_L=$ptr_L ptr_R=$ptr_R\n";
|
|
|
9288 |
my $leading_dir_L = "";
|
|
|
9289 |
$leading_dir_L = substr($fL, 0, $ptr_L+1) if $ptr_L >= 0;
|
|
|
9290 |
my $leading_dir_R = "";
|
|
|
9291 |
$leading_dir_R = substr($fR, 0, $ptr_R+1) if $ptr_R >= 0;
|
|
|
9292 |
#print "leading_dir_L=$leading_dir_L leading_dir_R=$leading_dir_R\n";
|
|
|
9293 |
++$candidate_leading_dir_L{$leading_dir_L};
|
|
|
9294 |
++$candidate_leading_dir_R{$leading_dir_R};
|
|
|
9295 |
}
|
|
|
9296 |
#use Data::Dumper::Simple;
|
|
|
9297 |
#print Dumper(%candidate_leading_dir_L);
|
|
|
9298 |
#print Dumper(%candidate_leading_dir_R);
|
|
|
9299 |
#die;
|
|
|
9300 |
my $best_L = (sort {
|
|
|
9301 |
$candidate_leading_dir_L{$b} <=>
|
|
|
9302 |
$candidate_leading_dir_L{$a}} keys %candidate_leading_dir_L)[0];
|
|
|
9303 |
my $best_R = (sort {
|
|
|
9304 |
$candidate_leading_dir_R{$b} <=>
|
|
|
9305 |
$candidate_leading_dir_R{$a}} keys %candidate_leading_dir_R)[0];
|
|
|
9306 |
return $best_L, $best_R, 1;
|
|
|
9307 |
} # 1}}}
|
|
|
9308 |
sub align_by_pairs { # {{{1
|
|
|
9309 |
my ($rh_file_list_L , # in
|
|
|
9310 |
$rh_file_list_R , # in
|
|
|
9311 |
$ra_added , # out
|
|
|
9312 |
$ra_removed , # out
|
|
|
9313 |
$ra_compare_list , # out
|
|
|
9314 |
) = @_;
|
|
|
9315 |
print "-> align_by_pairs()\n" if $opt_v > 2;
|
|
|
9316 |
@{$ra_compare_list} = ();
|
|
|
9317 |
|
|
|
9318 |
my @files_L = sort keys %{$rh_file_list_L};
|
|
|
9319 |
my @files_R = sort keys %{$rh_file_list_R};
|
|
|
9320 |
return () unless @files_L or @files_R; # at least one must have stuff
|
|
|
9321 |
if ( @files_L and !@files_R) {
|
|
|
9322 |
# left side has stuff, right side is empty; everything deleted
|
|
|
9323 |
@{$ra_added } = ();
|
|
|
9324 |
@{$ra_removed } = @files_L;
|
|
|
9325 |
@{$ra_compare_list} = ();
|
|
|
9326 |
return;
|
|
|
9327 |
} elsif (!@files_L and @files_R) {
|
|
|
9328 |
# left side is empty, right side has stuff; everything added
|
|
|
9329 |
@{$ra_added } = @files_R;
|
|
|
9330 |
@{$ra_removed } = ();
|
|
|
9331 |
@{$ra_compare_list} = ();
|
|
|
9332 |
return;
|
|
|
9333 |
}
|
|
|
9334 |
#use Data::Dumper::Simple;
|
|
|
9335 |
#print Dumper("align_by_pairs", %{$rh_file_list_L}, %{$rh_file_list_R},);
|
|
|
9336 |
#die;
|
|
|
9337 |
if (scalar @files_L == 1 and scalar @files_R == 1) {
|
|
|
9338 |
# The easy case: compare two files.
|
|
|
9339 |
push @{$ra_compare_list}, [ $files_L[0], $files_R[0] ];
|
|
|
9340 |
@{$ra_added } = ();
|
|
|
9341 |
@{$ra_removed} = ();
|
|
|
9342 |
return;
|
|
|
9343 |
}
|
|
|
9344 |
# The harder case: compare groups of files. This only works
|
|
|
9345 |
# if the groups are in different directories so the first step
|
|
|
9346 |
# is to strip the leading directory names from file lists to
|
|
|
9347 |
# make it possible to align by file names.
|
|
|
9348 |
my @files_L_minus_dir = undef;
|
|
|
9349 |
my @files_R_minus_dir = undef;
|
|
|
9350 |
|
|
|
9351 |
my $deepest_file_L = find_deepest_file(@files_L);
|
|
|
9352 |
my $deepest_file_R = find_deepest_file(@files_R);
|
|
|
9353 |
#print "deepest L = [$deepest_file_L]\n";
|
|
|
9354 |
#print "deepest R = [$deepest_file_R]\n";
|
|
|
9355 |
####my ($leading_dir_L, $leading_dir_R, $success) =
|
|
|
9356 |
#### find_uncommon_parent_dir($deepest_file_L, $deepest_file_R);
|
|
|
9357 |
my ($leading_dir_L, $leading_dir_R, $success) =
|
|
|
9358 |
get_leading_dirs($rh_file_list_L, $rh_file_list_R);
|
|
|
9359 |
#print "leading_dir_L=[$leading_dir_L]\n";
|
|
|
9360 |
#print "leading_dir_R=[$leading_dir_R]\n";
|
|
|
9361 |
#print "success =[$success]\n";
|
|
|
9362 |
if ($success) {
|
|
|
9363 |
@files_L_minus_dir = strip_leading_dir($leading_dir_L, @files_L);
|
|
|
9364 |
@files_R_minus_dir = strip_leading_dir($leading_dir_R, @files_R);
|
|
|
9365 |
} else {
|
|
|
9366 |
# otherwise fall back to old strategy
|
|
|
9367 |
@files_L_minus_dir = remove_leading_dir(@files_L);
|
|
|
9368 |
@files_R_minus_dir = remove_leading_dir(@files_R);
|
|
|
9369 |
}
|
|
|
9370 |
|
|
|
9371 |
# Keys of the stripped_X arrays are canonical file names;
|
|
|
9372 |
# should overlap mostly. Keys in stripped_L but not in
|
|
|
9373 |
# stripped_R are files that have been deleted. Keys in
|
|
|
9374 |
# stripped_R but not in stripped_L have been added.
|
|
|
9375 |
my %stripped_L = ();
|
|
|
9376 |
@stripped_L{ @files_L_minus_dir } = @files_L;
|
|
|
9377 |
my %stripped_R = ();
|
|
|
9378 |
@stripped_R{ @files_R_minus_dir } = @files_R;
|
|
|
9379 |
|
|
|
9380 |
my %common = ();
|
|
|
9381 |
foreach my $f (keys %stripped_L) {
|
|
|
9382 |
$common{$f} = 1 if defined $stripped_R{$f};
|
|
|
9383 |
}
|
|
|
9384 |
|
|
|
9385 |
my %deleted = ();
|
|
|
9386 |
foreach my $f (keys %stripped_L) {
|
|
|
9387 |
$deleted{$stripped_L{$f}} = $f unless defined $stripped_R{$f};
|
|
|
9388 |
}
|
|
|
9389 |
|
|
|
9390 |
my %added = ();
|
|
|
9391 |
foreach my $f (keys %stripped_R) {
|
|
|
9392 |
$added{$stripped_R{$f}} = $f unless defined $stripped_L{$f};
|
|
|
9393 |
}
|
|
|
9394 |
|
|
|
9395 |
#use Data::Dumper::Simple;
|
|
|
9396 |
#print Dumper("align_by_pairs", %stripped_L, %stripped_R);
|
|
|
9397 |
#print Dumper("align_by_pairs", %common, %added, %deleted);
|
|
|
9398 |
|
|
|
9399 |
foreach my $f (keys %common) {
|
|
|
9400 |
push @{$ra_compare_list}, [ $stripped_L{$f},
|
|
|
9401 |
$stripped_R{$f} ];
|
|
|
9402 |
}
|
|
|
9403 |
@{$ra_added } = keys %added ;
|
|
|
9404 |
@{$ra_removed } = keys %deleted;
|
|
|
9405 |
|
|
|
9406 |
print "<- align_by_pairs()\n" if $opt_v > 2;
|
|
|
9407 |
return;
|
|
|
9408 |
#print Dumper("align_by_pairs", @files_L_minus_dir, @files_R_minus_dir);
|
|
|
9409 |
#die;
|
|
|
9410 |
} # 1}}}
|
|
|
9411 |
sub html_header { # {{{1
|
|
|
9412 |
my ($title , ) = @_;
|
|
|
9413 |
|
|
|
9414 |
print "-> html_header\n" if $opt_v > 2;
|
|
|
9415 |
return
|
|
|
9416 |
'<html>
|
|
|
9417 |
<head>
|
|
|
9418 |
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
|
|
9419 |
<meta name="GENERATOR" content="cloc http://cloc.sourceforge.net">
|
|
|
9420 |
' .
|
|
|
9421 |
"
|
|
|
9422 |
<!-- Created by $script v$VERSION -->
|
|
|
9423 |
<title>$title</title>
|
|
|
9424 |
" .
|
|
|
9425 |
'
|
|
|
9426 |
<style TYPE="text/css">
|
|
|
9427 |
<!--
|
|
|
9428 |
body {
|
|
|
9429 |
color: black;
|
|
|
9430 |
background-color: white;
|
|
|
9431 |
font-family: monospace
|
|
|
9432 |
}
|
|
|
9433 |
|
|
|
9434 |
.whitespace {
|
|
|
9435 |
background-color: gray;
|
|
|
9436 |
}
|
|
|
9437 |
|
|
|
9438 |
.comment {
|
|
|
9439 |
color: gray;
|
|
|
9440 |
font-style: italic;
|
|
|
9441 |
}
|
|
|
9442 |
|
|
|
9443 |
.clinenum {
|
|
|
9444 |
color: red;
|
|
|
9445 |
}
|
|
|
9446 |
|
|
|
9447 |
.linenum {
|
|
|
9448 |
color: green;
|
|
|
9449 |
}
|
|
|
9450 |
-->
|
|
|
9451 |
</style>
|
|
|
9452 |
</head>
|
|
|
9453 |
<body>
|
|
|
9454 |
<pre><tt>
|
|
|
9455 |
';
|
|
|
9456 |
print "<- html_header\n" if $opt_v > 2;
|
|
|
9457 |
} # 1}}}
|
|
|
9458 |
sub html_end { # {{{1
|
|
|
9459 |
return
|
|
|
9460 |
'</tt></pre>
|
|
|
9461 |
</body>
|
|
|
9462 |
</html>
|
|
|
9463 |
';
|
|
|
9464 |
} # 1}}}
|
|
|
9465 |
sub die_unknown_lang { # {{{1
|
|
|
9466 |
my ($lang, $option_name) = @_;
|
|
|
9467 |
die "Unknown language '$lang' used with $option_name option. " .
|
|
|
9468 |
"The command\n $script --show-lang\n" .
|
|
|
9469 |
"will print all recognized languages. Language names are " .
|
|
|
9470 |
"case sensitive.\n" ;
|
|
|
9471 |
} # 1}}}
|
|
|
9472 |
sub unicode_file { # {{{1
|
|
|
9473 |
my $file = shift @_;
|
|
|
9474 |
|
|
|
9475 |
print "-> unicode_file($file)\n" if $opt_v > 2;
|
|
|
9476 |
return 0 if (-s $file > 2_000_000);
|
|
|
9477 |
# don't bother trying to test binary files bigger than 2 MB
|
|
|
9478 |
|
|
|
9479 |
my $IN = new IO::File $file, "r";
|
|
|
9480 |
if (!defined $IN) {
|
|
|
9481 |
warn "Unable to read $file; ignoring.\n";
|
|
|
9482 |
return 0;
|
|
|
9483 |
}
|
|
|
9484 |
my @lines = <$IN>;
|
|
|
9485 |
$IN->close;
|
|
|
9486 |
|
|
|
9487 |
if (unicode_to_ascii( join('', @lines) )) {
|
|
|
9488 |
print "<- unicode_file()\n" if $opt_v > 2;
|
|
|
9489 |
return 1;
|
|
|
9490 |
} else {
|
|
|
9491 |
print "<- unicode_file()\n" if $opt_v > 2;
|
|
|
9492 |
return 0;
|
|
|
9493 |
}
|
|
|
9494 |
|
|
|
9495 |
} # 1}}}
|
|
|
9496 |
sub unicode_to_ascii { # {{{1
|
|
|
9497 |
my $string = shift @_;
|
|
|
9498 |
|
|
|
9499 |
# A trivial attempt to convert UTF-16 little or big endian
|
|
|
9500 |
# files into ASCII. These files exhibit the following byte
|
|
|
9501 |
# sequence:
|
|
|
9502 |
# byte 1: 255
|
|
|
9503 |
# byte 2: 254
|
|
|
9504 |
# byte 3: ord of ASCII character
|
|
|
9505 |
# byte 4: 0
|
|
|
9506 |
# byte 3+i: ord of ASCII character
|
|
|
9507 |
# byte 4+i: 0
|
|
|
9508 |
# or
|
|
|
9509 |
# byte 1: 255
|
|
|
9510 |
# byte 2: 254
|
|
|
9511 |
# byte 3: 0
|
|
|
9512 |
# byte 4: ord of ASCII character
|
|
|
9513 |
# byte 3+i: 0
|
|
|
9514 |
# byte 4+i: ord of ASCII character
|
|
|
9515 |
|
|
|
9516 |
my $length = length $string;
|
|
|
9517 |
#print "length=$length\n";
|
|
|
9518 |
return '' if $length <= 3;
|
|
|
9519 |
my @unicode = split(//, $string);
|
|
|
9520 |
|
|
|
9521 |
# check the first 100 characters for big or little endian UTF-16 encoding
|
|
|
9522 |
my $max_peek = $length < 200 ? $length : 200;
|
|
|
9523 |
my @view_1 = ();
|
|
|
9524 |
for (my $i = 2; $i < $max_peek; $i += 2) { push @view_1, $unicode[$i] }
|
|
|
9525 |
my @view_2 = ();
|
|
|
9526 |
for (my $i = 3; $i < $max_peek; $i += 2) { push @view_2, $unicode[$i] }
|
|
|
9527 |
|
|
|
9528 |
my $points_1 = 0;
|
|
|
9529 |
foreach my $C (@view_1) {
|
|
|
9530 |
++$points_1 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
|
|
|
9531 |
or ord($C) == 10
|
|
|
9532 |
or ord($C) == 9;
|
|
|
9533 |
}
|
|
|
9534 |
|
|
|
9535 |
my $points_2 = 0;
|
|
|
9536 |
foreach my $C (@view_2) {
|
|
|
9537 |
++$points_2 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
|
|
|
9538 |
or ord($C) == 10
|
|
|
9539 |
or ord($C) == 9;
|
|
|
9540 |
}
|
|
|
9541 |
#print "points 1: $points_1\n";
|
|
|
9542 |
#print "points 2: $points_2\n";
|
|
|
9543 |
|
|
|
9544 |
my $offset = undef;
|
|
|
9545 |
if ($points_1 > 90) { $offset = 2; }
|
|
|
9546 |
elsif ($points_2 > 90) { $offset = 3; }
|
|
|
9547 |
else { return '' } # neither big or little endian UTF-16
|
|
|
9548 |
|
|
|
9549 |
my @ascii = ();
|
|
|
9550 |
for (my $i = $offset; $i < $length; $i += 2) { push @ascii, $unicode[$i]; }
|
|
|
9551 |
return join("", @ascii);
|
|
|
9552 |
} # 1}}}
|
|
|
9553 |
sub uncompress_archive_cmd { # {{{1
|
|
|
9554 |
my ($archive_file, ) = @_;
|
|
|
9555 |
|
|
|
9556 |
# Wrap $archive_file in single or double quotes in the system
|
|
|
9557 |
# commands below to avoid filename chicanery (including
|
|
|
9558 |
# spaces in the names).
|
|
|
9559 |
|
|
|
9560 |
print "-> uncompress_archive_cmd($archive_file)\n" if $opt_v > 2;
|
|
|
9561 |
my $extract_cmd = "";
|
|
|
9562 |
my $missing = "";
|
|
|
9563 |
if ($opt_extract_with) {
|
|
|
9564 |
( $extract_cmd = $opt_extract_with ) =~ s/>FILE</$archive_file/g;
|
|
|
9565 |
} elsif (basename($archive_file) eq "-" and !$ON_WINDOWS) {
|
|
|
9566 |
$extract_cmd = "cat > -";
|
|
|
9567 |
} elsif (($archive_file =~ /\.tar\.(gz|Z)$/ or
|
|
|
9568 |
$archive_file =~ /\.tgz$/ ) and !$ON_WINDOWS) {
|
|
|
9569 |
if (external_utility_exists("gzip --version")) {
|
|
|
9570 |
if (external_utility_exists("tar --version")) {
|
|
|
9571 |
$extract_cmd = "gzip -dc '$archive_file' | tar xf -";
|
|
|
9572 |
} else {
|
|
|
9573 |
$missing = "tar";
|
|
|
9574 |
}
|
|
|
9575 |
} else {
|
|
|
9576 |
$missing = "gzip";
|
|
|
9577 |
}
|
|
|
9578 |
} elsif ($archive_file =~ /\.tar\.bz2$/ and !$ON_WINDOWS) {
|
|
|
9579 |
if (external_utility_exists("bzip2 --help")) {
|
|
|
9580 |
if (external_utility_exists("tar --version")) {
|
|
|
9581 |
$extract_cmd = "bzip2 -dc '$archive_file' | tar xf -";
|
|
|
9582 |
} else {
|
|
|
9583 |
$missing = "tar";
|
|
|
9584 |
}
|
|
|
9585 |
} else {
|
|
|
9586 |
$missing = "bzip2";
|
|
|
9587 |
}
|
|
|
9588 |
} elsif ($archive_file =~ /\.tar\.xz$/ and !$ON_WINDOWS) {
|
|
|
9589 |
if (external_utility_exists("unxz --version")) {
|
|
|
9590 |
if (external_utility_exists("tar --version")) {
|
|
|
9591 |
$extract_cmd = "unxz -dc '$archive_file' | tar xf -";
|
|
|
9592 |
} else {
|
|
|
9593 |
$missing = "tar";
|
|
|
9594 |
}
|
|
|
9595 |
} else {
|
|
|
9596 |
$missing = "bzip2";
|
|
|
9597 |
}
|
|
|
9598 |
} elsif ($archive_file =~ /\.tar$/ and !$ON_WINDOWS) {
|
|
|
9599 |
$extract_cmd = "tar xf '$archive_file'";
|
|
|
9600 |
} elsif ($archive_file =~ /\.src\.rpm$/i and !$ON_WINDOWS) {
|
|
|
9601 |
if (external_utility_exists("cpio --version")) {
|
|
|
9602 |
if (external_utility_exists("rpm2cpio")) {
|
|
|
9603 |
$extract_cmd = "rpm2cpio '$archive_file' | cpio -i";
|
|
|
9604 |
} else {
|
|
|
9605 |
$missing = "rpm2cpio";
|
|
|
9606 |
}
|
|
|
9607 |
} else {
|
|
|
9608 |
$missing = "bzip2";
|
|
|
9609 |
}
|
|
|
9610 |
} elsif ($archive_file =~ /\.zip$/i and !$ON_WINDOWS) {
|
|
|
9611 |
if (external_utility_exists("unzip")) {
|
|
|
9612 |
$extract_cmd = "unzip -qq -d . '$archive_file'";
|
|
|
9613 |
} else {
|
|
|
9614 |
$missing = "unzip";
|
|
|
9615 |
}
|
|
|
9616 |
} elsif ($ON_WINDOWS and $archive_file =~ /\.zip$/i) {
|
|
|
9617 |
# zip on Windows, guess default Winzip install location
|
|
|
9618 |
$extract_cmd = "";
|
|
|
9619 |
my $WinZip = '"C:\\Program Files\\WinZip\\WinZip32.exe"';
|
|
|
9620 |
if (external_utility_exists($WinZip)) {
|
|
|
9621 |
$extract_cmd = "$WinZip -e -o \"$archive_file\" .";
|
|
|
9622 |
#print "trace 5 extract_cmd=[$extract_cmd]\n";
|
|
|
9623 |
} else {
|
|
|
9624 |
#print "trace 6\n";
|
|
|
9625 |
$missing = $WinZip;
|
|
|
9626 |
}
|
|
|
9627 |
}
|
|
|
9628 |
print "<- uncompress_archive_cmd\n" if $opt_v > 2;
|
|
|
9629 |
if ($missing) {
|
|
|
9630 |
die "Unable to expand $archive_file because external\n",
|
|
|
9631 |
"utility '$missing' is not available.\n",
|
|
|
9632 |
"Another possibility is to use the --extract-with option.\n";
|
|
|
9633 |
} else {
|
|
|
9634 |
return $extract_cmd;
|
|
|
9635 |
}
|
|
|
9636 |
}
|
|
|
9637 |
# 1}}}
|
|
|
9638 |
sub read_list_file { # {{{1
|
|
|
9639 |
my ($file, ) = @_;
|
|
|
9640 |
|
|
|
9641 |
print "-> read_list_file($file)\n" if $opt_v > 2;
|
|
|
9642 |
my $IN = new IO::File $file, "r";
|
|
|
9643 |
if (!defined $IN) {
|
|
|
9644 |
warn "Unable to read $file; ignoring.\n";
|
|
|
9645 |
next;
|
|
|
9646 |
}
|
|
|
9647 |
my @entry = ();
|
|
|
9648 |
while (<$IN>) {
|
|
|
9649 |
next if /^\s*$/ or /^\s*#/; # skip empty or commented lines
|
|
|
9650 |
s/\cM$//; # DOS to Unix
|
|
|
9651 |
chomp;
|
|
|
9652 |
push @entry, $_;
|
|
|
9653 |
}
|
|
|
9654 |
$IN->close;
|
|
|
9655 |
|
|
|
9656 |
print "<- read_list_file\n" if $opt_v > 2;
|
|
|
9657 |
return @entry;
|
|
|
9658 |
}
|
|
|
9659 |
# 1}}}
|
|
|
9660 |
sub external_utility_exists { # {{{1
|
|
|
9661 |
my $exe = shift @_;
|
|
|
9662 |
|
|
|
9663 |
my $success = 0;
|
|
|
9664 |
if ($ON_WINDOWS) {
|
|
|
9665 |
$success = 1 unless system $exe . ' > nul';
|
|
|
9666 |
} else {
|
|
|
9667 |
$success = 1 unless system $exe . ' >/dev/null 2>&1';
|
|
|
9668 |
if (!$success) {
|
|
|
9669 |
$success = 1 unless system "which" . " $exe" . ' >/dev/null 2>&1';
|
|
|
9670 |
}
|
|
|
9671 |
}
|
|
|
9672 |
|
|
|
9673 |
return $success;
|
|
|
9674 |
} # 1}}}
|
|
|
9675 |
sub write_xsl_file { # {{{1
|
|
|
9676 |
my $OUT = new IO::File $CLOC_XSL, "w";
|
|
|
9677 |
if (!defined $OUT) {
|
|
|
9678 |
warn "Unable to write $CLOC_XSL $!\n";
|
|
|
9679 |
return;
|
|
|
9680 |
}
|
|
|
9681 |
my $XSL = # <style> </style> {{{2
|
|
|
9682 |
'<?xml version="1.0" encoding="US-ASCII"?>
|
|
|
9683 |
<!-- XLS file by Paul Schwann, January 2009.
|
|
|
9684 |
Fixes for by-file and by-file-by-lang by d_uragan, November 2010.
|
|
|
9685 |
-->
|
|
|
9686 |
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
|
|
9687 |
<xsl:output method="html"/>
|
|
|
9688 |
<xsl:template match="/">
|
|
|
9689 |
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
|
9690 |
<head>
|
|
|
9691 |
<title>CLOC Results</title>
|
|
|
9692 |
</head>
|
|
|
9693 |
<style type="text/css">
|
|
|
9694 |
table {
|
|
|
9695 |
table-layout: auto;
|
|
|
9696 |
border-collapse: collapse;
|
|
|
9697 |
empty-cells: show;
|
|
|
9698 |
}
|
|
|
9699 |
td, th {
|
|
|
9700 |
padding: 4px;
|
|
|
9701 |
}
|
|
|
9702 |
th {
|
|
|
9703 |
background-color: #CCCCCC;
|
|
|
9704 |
}
|
|
|
9705 |
td {
|
|
|
9706 |
text-align: center;
|
|
|
9707 |
}
|
|
|
9708 |
table, td, tr, th {
|
|
|
9709 |
border: thin solid #999999;
|
|
|
9710 |
}
|
|
|
9711 |
</style>
|
|
|
9712 |
<body>
|
|
|
9713 |
<h3><xsl:value-of select="results/header"/></h3>
|
|
|
9714 |
';
|
|
|
9715 |
# 2}}}
|
|
|
9716 |
|
|
|
9717 |
if ($opt_by_file) {
|
|
|
9718 |
$XSL .= # <table> </table>{{{2
|
|
|
9719 |
' <table>
|
|
|
9720 |
<thead>
|
|
|
9721 |
<tr>
|
|
|
9722 |
<th>File</th>
|
|
|
9723 |
<th>Blank</th>
|
|
|
9724 |
<th>Comment</th>
|
|
|
9725 |
<th>Code</th>
|
|
|
9726 |
<th>Language</th>
|
|
|
9727 |
';
|
|
|
9728 |
$XSL .=
|
|
|
9729 |
' <th>3<sup>rd</sup> Generation Equivalent</th>
|
|
|
9730 |
<th>Scale</th>
|
|
|
9731 |
' if $opt_3;
|
|
|
9732 |
$XSL .=
|
|
|
9733 |
' </tr>
|
|
|
9734 |
</thead>
|
|
|
9735 |
<tbody>
|
|
|
9736 |
<xsl:for-each select="results/files/file">
|
|
|
9737 |
<tr>
|
|
|
9738 |
<th><xsl:value-of select="@name"/></th>
|
|
|
9739 |
<td><xsl:value-of select="@blank"/></td>
|
|
|
9740 |
<td><xsl:value-of select="@comment"/></td>
|
|
|
9741 |
<td><xsl:value-of select="@code"/></td>
|
|
|
9742 |
<td><xsl:value-of select="@language"/></td>
|
|
|
9743 |
';
|
|
|
9744 |
$XSL .=
|
|
|
9745 |
' <td><xsl:value-of select="@factor"/></td>
|
|
|
9746 |
<td><xsl:value-of select="@scaled"/></td>
|
|
|
9747 |
' if $opt_3;
|
|
|
9748 |
$XSL .=
|
|
|
9749 |
' </tr>
|
|
|
9750 |
</xsl:for-each>
|
|
|
9751 |
<tr>
|
|
|
9752 |
<th>Total</th>
|
|
|
9753 |
<th><xsl:value-of select="results/files/total/@blank"/></th>
|
|
|
9754 |
<th><xsl:value-of select="results/files/total/@comment"/></th>
|
|
|
9755 |
<th><xsl:value-of select="results/files/total/@code"/></th>
|
|
|
9756 |
<th><xsl:value-of select="results/files/total/@language"/></th>
|
|
|
9757 |
';
|
|
|
9758 |
$XSL .=
|
|
|
9759 |
' <th><xsl:value-of select="results/files/total/@factor"/></th>
|
|
|
9760 |
<th><xsl:value-of select="results/files/total/@scaled"/></th>
|
|
|
9761 |
' if $opt_3;
|
|
|
9762 |
$XSL .=
|
|
|
9763 |
' </tr>
|
|
|
9764 |
</tbody>
|
|
|
9765 |
</table>
|
|
|
9766 |
<br/>
|
|
|
9767 |
';
|
|
|
9768 |
# 2}}}
|
|
|
9769 |
}
|
|
|
9770 |
|
|
|
9771 |
if (!$opt_by_file or $opt_by_file_by_lang) {
|
|
|
9772 |
$XSL .= # <table> </table> {{{2
|
|
|
9773 |
' <table>
|
|
|
9774 |
<thead>
|
|
|
9775 |
<tr>
|
|
|
9776 |
<th>Language</th>
|
|
|
9777 |
<th>Files</th>
|
|
|
9778 |
<th>Blank</th>
|
|
|
9779 |
<th>Comment</th>
|
|
|
9780 |
<th>Code</th>
|
|
|
9781 |
';
|
|
|
9782 |
$XSL .=
|
|
|
9783 |
' <th>Scale</th>
|
|
|
9784 |
<th>3<sup>rd</sup> Generation Equivalent</th>
|
|
|
9785 |
' if $opt_3;
|
|
|
9786 |
$XSL .=
|
|
|
9787 |
' </tr>
|
|
|
9788 |
</thead>
|
|
|
9789 |
<tbody>
|
|
|
9790 |
<xsl:for-each select="results/languages/language">
|
|
|
9791 |
<tr>
|
|
|
9792 |
<th><xsl:value-of select="@name"/></th>
|
|
|
9793 |
<td><xsl:value-of select="@files_count"/></td>
|
|
|
9794 |
<td><xsl:value-of select="@blank"/></td>
|
|
|
9795 |
<td><xsl:value-of select="@comment"/></td>
|
|
|
9796 |
<td><xsl:value-of select="@code"/></td>
|
|
|
9797 |
';
|
|
|
9798 |
$XSL .=
|
|
|
9799 |
' <td><xsl:value-of select="@factor"/></td>
|
|
|
9800 |
<td><xsl:value-of select="@scaled"/></td>
|
|
|
9801 |
' if $opt_3;
|
|
|
9802 |
$XSL .=
|
|
|
9803 |
' </tr>
|
|
|
9804 |
</xsl:for-each>
|
|
|
9805 |
<tr>
|
|
|
9806 |
<th>Total</th>
|
|
|
9807 |
<th><xsl:value-of select="results/languages/total/@sum_files"/></th>
|
|
|
9808 |
<th><xsl:value-of select="results/languages/total/@blank"/></th>
|
|
|
9809 |
<th><xsl:value-of select="results/languages/total/@comment"/></th>
|
|
|
9810 |
<th><xsl:value-of select="results/languages/total/@code"/></th>
|
|
|
9811 |
';
|
|
|
9812 |
$XSL .=
|
|
|
9813 |
' <th><xsl:value-of select="results/languages/total/@factor"/></th>
|
|
|
9814 |
<th><xsl:value-of select="results/languages/total/@scaled"/></th>
|
|
|
9815 |
' if $opt_3;
|
|
|
9816 |
$XSL .=
|
|
|
9817 |
' </tr>
|
|
|
9818 |
</tbody>
|
|
|
9819 |
</table>
|
|
|
9820 |
';
|
|
|
9821 |
# 2}}}
|
|
|
9822 |
}
|
|
|
9823 |
|
|
|
9824 |
$XSL.= <<'EO_XSL'; # {{{2
|
|
|
9825 |
</body>
|
|
|
9826 |
</html>
|
|
|
9827 |
</xsl:template>
|
|
|
9828 |
</xsl:stylesheet>
|
|
|
9829 |
|
|
|
9830 |
EO_XSL
|
|
|
9831 |
# 2}}}
|
|
|
9832 |
|
|
|
9833 |
my $XSL_DIFF = <<'EO_DIFF_XSL'; # {{{2
|
|
|
9834 |
<?xml version="1.0" encoding="US-ASCII"?>
|
|
|
9835 |
<!-- XLS file by Blazej Kroll, November 2010 -->
|
|
|
9836 |
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
|
|
9837 |
<xsl:output method="html"/>
|
|
|
9838 |
<xsl:template match="/">
|
|
|
9839 |
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
|
9840 |
<head>
|
|
|
9841 |
<title>CLOC Results</title>
|
|
|
9842 |
</head>
|
|
|
9843 |
<style type="text/css">
|
|
|
9844 |
table {
|
|
|
9845 |
table-layout: auto;
|
|
|
9846 |
border-collapse: collapse;
|
|
|
9847 |
empty-cells: show;
|
|
|
9848 |
margin: 1em;
|
|
|
9849 |
}
|
|
|
9850 |
td, th {
|
|
|
9851 |
padding: 4px;
|
|
|
9852 |
}
|
|
|
9853 |
th {
|
|
|
9854 |
background-color: #CCCCCC;
|
|
|
9855 |
}
|
|
|
9856 |
td {
|
|
|
9857 |
text-align: center;
|
|
|
9858 |
}
|
|
|
9859 |
table, td, tr, th {
|
|
|
9860 |
border: thin solid #999999;
|
|
|
9861 |
}
|
|
|
9862 |
</style>
|
|
|
9863 |
<body>
|
|
|
9864 |
<h3><xsl:value-of select="results/header"/></h3>
|
|
|
9865 |
EO_DIFF_XSL
|
|
|
9866 |
# 2}}}
|
|
|
9867 |
|
|
|
9868 |
if ($opt_by_file) {
|
|
|
9869 |
$XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
|
|
|
9870 |
<table>
|
|
|
9871 |
<thead>
|
|
|
9872 |
<tr><th colspan="4">Same</th>
|
|
|
9873 |
</tr>
|
|
|
9874 |
<tr>
|
|
|
9875 |
<th>File</th>
|
|
|
9876 |
<th>Blank</th>
|
|
|
9877 |
<th>Comment</th>
|
|
|
9878 |
<th>Code</th>
|
|
|
9879 |
</tr>
|
|
|
9880 |
</thead>
|
|
|
9881 |
<tbody>
|
|
|
9882 |
<xsl:for-each select="diff_results/same/file">
|
|
|
9883 |
<tr>
|
|
|
9884 |
<th><xsl:value-of select="@name"/></th>
|
|
|
9885 |
<td><xsl:value-of select="@blank"/></td>
|
|
|
9886 |
<td><xsl:value-of select="@comment"/></td>
|
|
|
9887 |
<td><xsl:value-of select="@code"/></td>
|
|
|
9888 |
</tr>
|
|
|
9889 |
</xsl:for-each>
|
|
|
9890 |
</tbody>
|
|
|
9891 |
</table>
|
|
|
9892 |
|
|
|
9893 |
<table>
|
|
|
9894 |
<thead>
|
|
|
9895 |
<tr><th colspan="4">Modified</th>
|
|
|
9896 |
</tr>
|
|
|
9897 |
<tr>
|
|
|
9898 |
<th>File</th>
|
|
|
9899 |
<th>Blank</th>
|
|
|
9900 |
<th>Comment</th>
|
|
|
9901 |
<th>Code</th>
|
|
|
9902 |
</tr>
|
|
|
9903 |
</thead>
|
|
|
9904 |
<tbody>
|
|
|
9905 |
<xsl:for-each select="diff_results/modified/file">
|
|
|
9906 |
<tr>
|
|
|
9907 |
<th><xsl:value-of select="@name"/></th>
|
|
|
9908 |
<td><xsl:value-of select="@blank"/></td>
|
|
|
9909 |
<td><xsl:value-of select="@comment"/></td>
|
|
|
9910 |
<td><xsl:value-of select="@code"/></td>
|
|
|
9911 |
</tr>
|
|
|
9912 |
</xsl:for-each>
|
|
|
9913 |
</tbody>
|
|
|
9914 |
</table>
|
|
|
9915 |
|
|
|
9916 |
<table>
|
|
|
9917 |
<thead>
|
|
|
9918 |
<tr><th colspan="4">Added</th>
|
|
|
9919 |
</tr>
|
|
|
9920 |
<tr>
|
|
|
9921 |
<th>File</th>
|
|
|
9922 |
<th>Blank</th>
|
|
|
9923 |
<th>Comment</th>
|
|
|
9924 |
<th>Code</th>
|
|
|
9925 |
</tr>
|
|
|
9926 |
</thead>
|
|
|
9927 |
<tbody>
|
|
|
9928 |
<xsl:for-each select="diff_results/added/file">
|
|
|
9929 |
<tr>
|
|
|
9930 |
<th><xsl:value-of select="@name"/></th>
|
|
|
9931 |
<td><xsl:value-of select="@blank"/></td>
|
|
|
9932 |
<td><xsl:value-of select="@comment"/></td>
|
|
|
9933 |
<td><xsl:value-of select="@code"/></td>
|
|
|
9934 |
</tr>
|
|
|
9935 |
</xsl:for-each>
|
|
|
9936 |
</tbody>
|
|
|
9937 |
</table>
|
|
|
9938 |
|
|
|
9939 |
<table>
|
|
|
9940 |
<thead>
|
|
|
9941 |
<tr><th colspan="4">Removed</th>
|
|
|
9942 |
</tr>
|
|
|
9943 |
<tr>
|
|
|
9944 |
<th>File</th>
|
|
|
9945 |
<th>Blank</th>
|
|
|
9946 |
<th>Comment</th>
|
|
|
9947 |
<th>Code</th>
|
|
|
9948 |
</tr>
|
|
|
9949 |
</thead>
|
|
|
9950 |
<tbody>
|
|
|
9951 |
<xsl:for-each select="diff_results/removed/file">
|
|
|
9952 |
<tr>
|
|
|
9953 |
<th><xsl:value-of select="@name"/></th>
|
|
|
9954 |
<td><xsl:value-of select="@blank"/></td>
|
|
|
9955 |
<td><xsl:value-of select="@comment"/></td>
|
|
|
9956 |
<td><xsl:value-of select="@code"/></td>
|
|
|
9957 |
</tr>
|
|
|
9958 |
</xsl:for-each>
|
|
|
9959 |
</tbody>
|
|
|
9960 |
</table>
|
|
|
9961 |
EO_DIFF_XSL
|
|
|
9962 |
# 2}}}
|
|
|
9963 |
}
|
|
|
9964 |
|
|
|
9965 |
if (!$opt_by_file or $opt_by_file_by_lang) {
|
|
|
9966 |
$XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
|
|
|
9967 |
<table>
|
|
|
9968 |
<thead>
|
|
|
9969 |
<tr><th colspan="5">Same</th>
|
|
|
9970 |
</tr>
|
|
|
9971 |
<tr>
|
|
|
9972 |
<th>Language</th>
|
|
|
9973 |
<th>Files</th>
|
|
|
9974 |
<th>Blank</th>
|
|
|
9975 |
<th>Comment</th>
|
|
|
9976 |
<th>Code</th>
|
|
|
9977 |
</tr>
|
|
|
9978 |
</thead>
|
|
|
9979 |
<tbody>
|
|
|
9980 |
<xsl:for-each select="diff_results/same/language">
|
|
|
9981 |
<tr>
|
|
|
9982 |
<th><xsl:value-of select="@name"/></th>
|
|
|
9983 |
<td><xsl:value-of select="@files_count"/></td>
|
|
|
9984 |
<td><xsl:value-of select="@blank"/></td>
|
|
|
9985 |
<td><xsl:value-of select="@comment"/></td>
|
|
|
9986 |
<td><xsl:value-of select="@code"/></td>
|
|
|
9987 |
</tr>
|
|
|
9988 |
</xsl:for-each>
|
|
|
9989 |
</tbody>
|
|
|
9990 |
</table>
|
|
|
9991 |
|
|
|
9992 |
<table>
|
|
|
9993 |
<thead>
|
|
|
9994 |
<tr><th colspan="5">Modified</th>
|
|
|
9995 |
</tr>
|
|
|
9996 |
<tr>
|
|
|
9997 |
<th>Language</th>
|
|
|
9998 |
<th>Files</th>
|
|
|
9999 |
<th>Blank</th>
|
|
|
10000 |
<th>Comment</th>
|
|
|
10001 |
<th>Code</th>
|
|
|
10002 |
</tr>
|
|
|
10003 |
</thead>
|
|
|
10004 |
<tbody>
|
|
|
10005 |
<xsl:for-each select="diff_results/modified/language">
|
|
|
10006 |
<tr>
|
|
|
10007 |
<th><xsl:value-of select="@name"/></th>
|
|
|
10008 |
<td><xsl:value-of select="@files_count"/></td>
|
|
|
10009 |
<td><xsl:value-of select="@blank"/></td>
|
|
|
10010 |
<td><xsl:value-of select="@comment"/></td>
|
|
|
10011 |
<td><xsl:value-of select="@code"/></td>
|
|
|
10012 |
</tr>
|
|
|
10013 |
</xsl:for-each>
|
|
|
10014 |
</tbody>
|
|
|
10015 |
</table>
|
|
|
10016 |
|
|
|
10017 |
<table>
|
|
|
10018 |
<thead>
|
|
|
10019 |
<tr><th colspan="5">Added</th>
|
|
|
10020 |
</tr>
|
|
|
10021 |
<tr>
|
|
|
10022 |
<th>Language</th>
|
|
|
10023 |
<th>Files</th>
|
|
|
10024 |
<th>Blank</th>
|
|
|
10025 |
<th>Comment</th>
|
|
|
10026 |
<th>Code</th>
|
|
|
10027 |
</tr>
|
|
|
10028 |
</thead>
|
|
|
10029 |
<tbody>
|
|
|
10030 |
<xsl:for-each select="diff_results/added/language">
|
|
|
10031 |
<tr>
|
|
|
10032 |
<th><xsl:value-of select="@name"/></th>
|
|
|
10033 |
<td><xsl:value-of select="@files_count"/></td>
|
|
|
10034 |
<td><xsl:value-of select="@blank"/></td>
|
|
|
10035 |
<td><xsl:value-of select="@comment"/></td>
|
|
|
10036 |
<td><xsl:value-of select="@code"/></td>
|
|
|
10037 |
</tr>
|
|
|
10038 |
</xsl:for-each>
|
|
|
10039 |
</tbody>
|
|
|
10040 |
</table>
|
|
|
10041 |
|
|
|
10042 |
<table>
|
|
|
10043 |
<thead>
|
|
|
10044 |
<tr><th colspan="5">Removed</th>
|
|
|
10045 |
</tr>
|
|
|
10046 |
<tr>
|
|
|
10047 |
<th>Language</th>
|
|
|
10048 |
<th>Files</th>
|
|
|
10049 |
<th>Blank</th>
|
|
|
10050 |
<th>Comment</th>
|
|
|
10051 |
<th>Code</th>
|
|
|
10052 |
</tr>
|
|
|
10053 |
</thead>
|
|
|
10054 |
<tbody>
|
|
|
10055 |
<xsl:for-each select="diff_results/removed/language">
|
|
|
10056 |
<tr>
|
|
|
10057 |
<th><xsl:value-of select="@name"/></th>
|
|
|
10058 |
<td><xsl:value-of select="@files_count"/></td>
|
|
|
10059 |
<td><xsl:value-of select="@blank"/></td>
|
|
|
10060 |
<td><xsl:value-of select="@comment"/></td>
|
|
|
10061 |
<td><xsl:value-of select="@code"/></td>
|
|
|
10062 |
</tr>
|
|
|
10063 |
</xsl:for-each>
|
|
|
10064 |
</tbody>
|
|
|
10065 |
</table>
|
|
|
10066 |
EO_DIFF_XSL
|
|
|
10067 |
# 2}}}
|
|
|
10068 |
|
|
|
10069 |
}
|
|
|
10070 |
|
|
|
10071 |
$XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
|
|
|
10072 |
</body>
|
|
|
10073 |
</html>
|
|
|
10074 |
</xsl:template>
|
|
|
10075 |
</xsl:stylesheet>
|
|
|
10076 |
EO_DIFF_XSL
|
|
|
10077 |
# 2}}}
|
|
|
10078 |
if ($opt_diff) {
|
|
|
10079 |
print $OUT $XSL_DIFF;
|
|
|
10080 |
} else {
|
|
|
10081 |
print $OUT $XSL;
|
|
|
10082 |
}
|
|
|
10083 |
$OUT->close();
|
|
|
10084 |
} # 1}}}
|
|
|
10085 |
sub normalize_file_names { # {{{1
|
|
|
10086 |
my (@files, ) = @_;
|
|
|
10087 |
|
|
|
10088 |
# Returns a hash of file names reduced to a canonical form
|
|
|
10089 |
# (fully qualified file names, all path separators changed to /,
|
|
|
10090 |
# Windows file names lowercased). Hash values are the original
|
|
|
10091 |
# file name.
|
|
|
10092 |
|
|
|
10093 |
my %normalized = ();
|
|
|
10094 |
foreach my $F (@files) {
|
|
|
10095 |
my $F_norm = $F;
|
|
|
10096 |
if ($ON_WINDOWS) {
|
|
|
10097 |
$F_norm = lc $F_norm; # for case insensitive file name comparisons
|
|
|
10098 |
$F_norm =~ s{\\}{/}g; # Windows directory separators to Unix
|
|
|
10099 |
$F_norm =~ s{^\./}{}g; # remove leading ./
|
|
|
10100 |
if (($F_norm !~ m{^/}) and ($F_norm !~ m{^\w:/})) {
|
|
|
10101 |
# looks like a relative path; prefix with cwd
|
|
|
10102 |
$F_norm = lc "$cwd/$F_norm";
|
|
|
10103 |
}
|
|
|
10104 |
} else {
|
|
|
10105 |
$F_norm =~ s{^\./}{}g; # remove leading ./
|
|
|
10106 |
if ($F_norm !~ m{^/}) {
|
|
|
10107 |
# looks like a relative path; prefix with cwd
|
|
|
10108 |
$F_norm = lc "$cwd/$F_norm";
|
|
|
10109 |
}
|
|
|
10110 |
}
|
|
|
10111 |
$normalized{ $F_norm } = $F;
|
|
|
10112 |
}
|
|
|
10113 |
return %normalized;
|
|
|
10114 |
} # 1}}}
|
|
|
10115 |
sub combine_diffs { # {{{1
|
|
|
10116 |
# subroutine by Andy (awalshe@sf.net)
|
|
|
10117 |
# https://sourceforge.net/tracker/?func=detail&aid=3261017&group_id=174787&atid=870625
|
|
|
10118 |
my ($ra_files) = @_;
|
|
|
10119 |
|
|
|
10120 |
my $res = "$URL v $VERSION\n";
|
|
|
10121 |
my $dl = '-';
|
|
|
10122 |
my $width = 79;
|
|
|
10123 |
# columns are in this order
|
|
|
10124 |
my @cols = ('files', 'blank', 'comment', 'code');
|
|
|
10125 |
my %HoH = ();
|
|
|
10126 |
|
|
|
10127 |
foreach my $file (@{$ra_files}) {
|
|
|
10128 |
my $IN = new IO::File $file, "r";
|
|
|
10129 |
if (!defined $IN) {
|
|
|
10130 |
warn "Unable to read $file; ignoring.\n";
|
|
|
10131 |
next;
|
|
|
10132 |
}
|
|
|
10133 |
|
|
|
10134 |
my $sec;
|
|
|
10135 |
while (<$IN>) {
|
|
|
10136 |
chomp;
|
|
|
10137 |
s/\cM$//;
|
|
|
10138 |
next if /^(http|Language|-----)/;
|
|
|
10139 |
if (/^[A-Za-z0-9]+/) { # section title
|
|
|
10140 |
$sec = $_;
|
|
|
10141 |
chomp($sec);
|
|
|
10142 |
$HoH{$sec} = () if ! exists $HoH{$sec};
|
|
|
10143 |
next;
|
|
|
10144 |
}
|
|
|
10145 |
|
|
|
10146 |
if (/^\s(same|modified|added|removed)/) { # calculated totals row
|
|
|
10147 |
my @ar = grep { $_ ne '' } split(/ /, $_);
|
|
|
10148 |
chomp(@ar);
|
|
|
10149 |
my $ttl = shift @ar;
|
|
|
10150 |
my $i = 0;
|
|
|
10151 |
foreach(@ar) {
|
|
|
10152 |
my $t = "${ttl}${dl}${cols[$i]}";
|
|
|
10153 |
$HoH{$sec}{$t} = 0 if ! exists $HoH{$sec}{$t};
|
|
|
10154 |
$HoH{$sec}{$t} += $_;
|
|
|
10155 |
$i++;
|
|
|
10156 |
}
|
|
|
10157 |
}
|
|
|
10158 |
}
|
|
|
10159 |
$IN->close;
|
|
|
10160 |
}
|
|
|
10161 |
|
|
|
10162 |
# rows are in this order
|
|
|
10163 |
my @rows = ('same', 'modified', 'added', 'removed');
|
|
|
10164 |
|
|
|
10165 |
$res .= sprintf("%s\n", "-" x $width);
|
|
|
10166 |
$res .= sprintf("%-19s %14s %14s %14s %14s\n", 'Language',
|
|
|
10167 |
$cols[0], $cols[1], $cols[2], $cols[3]);
|
|
|
10168 |
$res .= sprintf("%s\n", "-" x $width);
|
|
|
10169 |
|
|
|
10170 |
for my $sec ( keys %HoH ) {
|
|
|
10171 |
next if $sec =~ /SUM:/;
|
|
|
10172 |
$res .= "$sec\n";
|
|
|
10173 |
foreach (@rows) {
|
|
|
10174 |
$res .= sprintf(" %-18s %14s %14s %14s %14s\n",
|
|
|
10175 |
$_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
|
|
|
10176 |
$HoH{$sec}{"${_}${dl}${cols[1]}"},
|
|
|
10177 |
$HoH{$sec}{"${_}${dl}${cols[2]}"},
|
|
|
10178 |
$HoH{$sec}{"${_}${dl}${cols[3]}"});
|
|
|
10179 |
}
|
|
|
10180 |
}
|
|
|
10181 |
$res .= sprintf("%s\n", "-" x $width);
|
|
|
10182 |
my $sec = 'SUM:';
|
|
|
10183 |
$res .= "$sec\n";
|
|
|
10184 |
foreach (@rows) {
|
|
|
10185 |
$res .= sprintf(" %-18s %14s %14s %14s %14s\n",
|
|
|
10186 |
$_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
|
|
|
10187 |
$HoH{$sec}{"${_}${dl}${cols[1]}"},
|
|
|
10188 |
$HoH{$sec}{"${_}${dl}${cols[2]}"},
|
|
|
10189 |
$HoH{$sec}{"${_}${dl}${cols[3]}"});
|
|
|
10190 |
}
|
|
|
10191 |
$res .= sprintf("%s\n", "-" x $width);
|
|
|
10192 |
|
|
|
10193 |
return $res;
|
|
|
10194 |
} # 1}}}
|
|
|
10195 |
sub get_time { # {{{1
|
|
|
10196 |
if ($HAVE_Time_HiRes) {
|
|
|
10197 |
return Time::HiRes::time();
|
|
|
10198 |
} else {
|
|
|
10199 |
return time();
|
|
|
10200 |
}
|
|
|
10201 |
} # 1}}}
|
|
|
10202 |
sub really_is_D { # {{{1
|
|
|
10203 |
# Ref bug 131, files ending with .d could be init.d scripts
|
|
|
10204 |
# instead of D language source files.
|
|
|
10205 |
my ($file , # in
|
|
|
10206 |
$rh_Err , # in hash of error codes
|
|
|
10207 |
$raa_errors , # out
|
|
|
10208 |
) = @_;
|
|
|
10209 |
print "-> really_is_D($file)\n" if $opt_v > 2;
|
|
|
10210 |
my $possible_script = peek_at_first_line($file, $rh_Err, $raa_errors);
|
|
|
10211 |
|
|
|
10212 |
print "<- really_is_D($file)\n" if $opt_v > 2;
|
|
|
10213 |
return $possible_script; # null string if D, otherwise a language
|
|
|
10214 |
} # 1}}}
|
|
|
10215 |
# subroutines copied from SLOCCount
|
|
|
10216 |
my %lex_files = (); # really_is_lex()
|
|
|
10217 |
my %expect_files = (); # really_is_expect()
|
|
|
10218 |
my %php_files = (); # really_is_php()
|
|
|
10219 |
sub really_is_lex { # {{{1
|
|
|
10220 |
# Given filename, returns TRUE if its contents really is lex.
|
|
|
10221 |
# lex file must have "%%", "%{", and "%}".
|
|
|
10222 |
# In theory, a lex file doesn't need "%{" and "%}", but in practice
|
|
|
10223 |
# they all have them, and requiring them avoid mislabeling a
|
|
|
10224 |
# non-lexfile as a lex file.
|
|
|
10225 |
|
|
|
10226 |
my $filename = shift;
|
|
|
10227 |
chomp($filename);
|
|
|
10228 |
|
|
|
10229 |
my $is_lex = 0; # Value to determine.
|
|
|
10230 |
my $percent_percent = 0;
|
|
|
10231 |
my $percent_opencurly = 0;
|
|
|
10232 |
my $percent_closecurly = 0;
|
|
|
10233 |
|
|
|
10234 |
# Return cached result, if available:
|
|
|
10235 |
if ($lex_files{$filename}) { return $lex_files{$filename};}
|
|
|
10236 |
|
|
|
10237 |
open(LEX_FILE, "<$filename") ||
|
|
|
10238 |
die "Can't open $filename to determine if it's lex.\n";
|
|
|
10239 |
while(<LEX_FILE>) {
|
|
|
10240 |
$percent_percent++ if (m/^\s*\%\%/);
|
|
|
10241 |
$percent_opencurly++ if (m/^\s*\%\{/);
|
|
|
10242 |
$percent_closecurly++ if (m/^\s*\%\}/);
|
|
|
10243 |
}
|
|
|
10244 |
close(LEX_FILE);
|
|
|
10245 |
|
|
|
10246 |
if ($percent_percent && $percent_opencurly && $percent_closecurly)
|
|
|
10247 |
{$is_lex = 1;}
|
|
|
10248 |
|
|
|
10249 |
$lex_files{$filename} = $is_lex; # Store result in cache.
|
|
|
10250 |
|
|
|
10251 |
return $is_lex;
|
|
|
10252 |
} # 1}}}
|
|
|
10253 |
sub really_is_expect { # {{{1
|
|
|
10254 |
# Given filename, returns TRUE if its contents really are Expect.
|
|
|
10255 |
# Many "exp" files (such as in Apache and Mesa) are just "export" data,
|
|
|
10256 |
# summarizing something else # (e.g., its interface).
|
|
|
10257 |
# Sometimes (like in RPM) it's just misc. data.
|
|
|
10258 |
# Thus, we need to look at the file to determine
|
|
|
10259 |
# if it's really an "expect" file.
|
|
|
10260 |
|
|
|
10261 |
my $filename = shift;
|
|
|
10262 |
chomp($filename);
|
|
|
10263 |
|
|
|
10264 |
# The heuristic is as follows: it's Expect _IF_ it:
|
|
|
10265 |
# 1. has "load_lib" command and either "#" comments or {}.
|
|
|
10266 |
# 2. {, }, and one of: proc, if, [...], expect
|
|
|
10267 |
|
|
|
10268 |
my $is_expect = 0; # Value to determine.
|
|
|
10269 |
|
|
|
10270 |
my $begin_brace = 0; # Lines that begin with curly braces.
|
|
|
10271 |
my $end_brace = 0; # Lines that begin with curly braces.
|
|
|
10272 |
my $load_lib = 0; # Lines with the Load_lib command.
|
|
|
10273 |
my $found_proc = 0;
|
|
|
10274 |
my $found_if = 0;
|
|
|
10275 |
my $found_brackets = 0;
|
|
|
10276 |
my $found_expect = 0;
|
|
|
10277 |
my $found_pound = 0;
|
|
|
10278 |
|
|
|
10279 |
# Return cached result, if available:
|
|
|
10280 |
if ($expect_files{$filename}) { return expect_files{$filename};}
|
|
|
10281 |
|
|
|
10282 |
open(EXPECT_FILE, "<$filename") ||
|
|
|
10283 |
die "Can't open $filename to determine if it's expect.\n";
|
|
|
10284 |
while(<EXPECT_FILE>) {
|
|
|
10285 |
|
|
|
10286 |
if (m/#/) {$found_pound++; s/#.*//;}
|
|
|
10287 |
if (m/^\s*\{/) { $begin_brace++;}
|
|
|
10288 |
if (m/\{\s*$/) { $begin_brace++;}
|
|
|
10289 |
if (m/^\s*\}/) { $end_brace++;}
|
|
|
10290 |
if (m/\};?\s*$/) { $end_brace++;}
|
|
|
10291 |
if (m/^\s*load_lib\s+\S/) { $load_lib++;}
|
|
|
10292 |
if (m/^\s*proc\s/) { $found_proc++;}
|
|
|
10293 |
if (m/^\s*if\s/) { $found_if++;}
|
|
|
10294 |
if (m/\[.*\]/) { $found_brackets++;}
|
|
|
10295 |
if (m/^\s*expect\s/) { $found_expect++;}
|
|
|
10296 |
}
|
|
|
10297 |
close(EXPECT_FILE);
|
|
|
10298 |
|
|
|
10299 |
if ($load_lib && ($found_pound || ($begin_brace && $end_brace)))
|
|
|
10300 |
{$is_expect = 1;}
|
|
|
10301 |
if ( $begin_brace && $end_brace &&
|
|
|
10302 |
($found_proc || $found_if || $found_brackets || $found_expect))
|
|
|
10303 |
{$is_expect = 1;}
|
|
|
10304 |
|
|
|
10305 |
$expect_files{$filename} = $is_expect; # Store result in cache.
|
|
|
10306 |
|
|
|
10307 |
return $is_expect;
|
|
|
10308 |
} # 1}}}
|
|
|
10309 |
sub really_is_pascal { # {{{1
|
|
|
10310 |
# Given filename, returns TRUE if its contents really are Pascal.
|
|
|
10311 |
|
|
|
10312 |
# This isn't as obvious as it seems.
|
|
|
10313 |
# Many ".p" files are Perl files
|
|
|
10314 |
# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
|
|
|
10315 |
# others are C extractions
|
|
|
10316 |
# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
|
|
|
10317 |
# and some files in linuxconf).
|
|
|
10318 |
# However, test files in "p2c" really are Pascal, for example.
|
|
|
10319 |
|
|
|
10320 |
# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
|
|
|
10321 |
# is actually C code. The heuristics determine that they're not Pascal,
|
|
|
10322 |
# but because it ends in ".p" it's not counted as C code either.
|
|
|
10323 |
# I believe this is actually correct behavior, because frankly it
|
|
|
10324 |
# looks like it's automatically generated (it's a bitmap expressed as code).
|
|
|
10325 |
# Rather than guess otherwise, we don't include it in a list of
|
|
|
10326 |
# source files. Let's face it, someone who creates C files ending in ".p"
|
|
|
10327 |
# and expects them to be counted by default as C files in SLOCCount needs
|
|
|
10328 |
# their head examined. I suggest examining their head
|
|
|
10329 |
# with a sucker rod (see syslogd(8) for more on sucker rods).
|
|
|
10330 |
|
|
|
10331 |
# This heuristic counts as Pascal such files such as:
|
|
|
10332 |
# /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
|
|
|
10333 |
# Which is hand-generated. We don't count woven documents now anyway,
|
|
|
10334 |
# so this is justifiable.
|
|
|
10335 |
|
|
|
10336 |
my $filename = shift;
|
|
|
10337 |
chomp($filename);
|
|
|
10338 |
|
|
|
10339 |
# The heuristic is as follows: it's Pascal _IF_ it has all of the following
|
|
|
10340 |
# (ignoring {...} and (*...*) comments):
|
|
|
10341 |
# 1. "^..program NAME" or "^..unit NAME",
|
|
|
10342 |
# 2. "procedure", "function", "^..interface", or "^..implementation",
|
|
|
10343 |
# 3. a "begin", and
|
|
|
10344 |
# 4. it ends with "end.",
|
|
|
10345 |
#
|
|
|
10346 |
# Or it has all of the following:
|
|
|
10347 |
# 1. "^..module NAME" and
|
|
|
10348 |
# 2. it ends with "end.".
|
|
|
10349 |
#
|
|
|
10350 |
# Or it has all of the following:
|
|
|
10351 |
# 1. "^..program NAME",
|
|
|
10352 |
# 2. a "begin", and
|
|
|
10353 |
# 3. it ends with "end.".
|
|
|
10354 |
#
|
|
|
10355 |
# The "end." requirements in particular filter out non-Pascal.
|
|
|
10356 |
#
|
|
|
10357 |
# Note (jgb): this does not detect Pascal main files in fpc, like
|
|
|
10358 |
# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
|
|
|
10359 |
# it
|
|
|
10360 |
|
|
|
10361 |
my $is_pascal = 0; # Value to determine.
|
|
|
10362 |
|
|
|
10363 |
my $has_program = 0;
|
|
|
10364 |
my $has_unit = 0;
|
|
|
10365 |
my $has_module = 0;
|
|
|
10366 |
my $has_procedure_or_function = 0;
|
|
|
10367 |
my $found_begin = 0;
|
|
|
10368 |
my $found_terminating_end = 0;
|
|
|
10369 |
my $has_begin = 0;
|
|
|
10370 |
|
|
|
10371 |
open(PASCAL_FILE, "<$filename") ||
|
|
|
10372 |
die "Can't open $filename to determine if it's pascal.\n";
|
|
|
10373 |
while(<PASCAL_FILE>) {
|
|
|
10374 |
s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective.
|
|
|
10375 |
s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective.
|
|
|
10376 |
if (m/\bprogram\s+[A-Za-z]/i) {$has_program=1;}
|
|
|
10377 |
if (m/\bunit\s+[A-Za-z]/i) {$has_unit=1;}
|
|
|
10378 |
if (m/\bmodule\s+[A-Za-z]/i) {$has_module=1;}
|
|
|
10379 |
if (m/\bprocedure\b/i) { $has_procedure_or_function = 1; }
|
|
|
10380 |
if (m/\bfunction\b/i) { $has_procedure_or_function = 1; }
|
|
|
10381 |
if (m/^\s*interface\s+/i) { $has_procedure_or_function = 1; }
|
|
|
10382 |
if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
|
|
|
10383 |
if (m/\bbegin\b/i) { $has_begin = 1; }
|
|
|
10384 |
# Originally I said:
|
|
|
10385 |
# "This heuristic fails if there are multi-line comments after
|
|
|
10386 |
# "end."; I haven't seen that in real Pascal programs:"
|
|
|
10387 |
# But jgb found there are a good quantity of them in Debian, specially in
|
|
|
10388 |
# fpc (at the end of a lot of files there is a multiline comment
|
|
|
10389 |
# with the changelog for the file).
|
|
|
10390 |
# Therefore, assume Pascal if "end." appears anywhere in the file.
|
|
|
10391 |
if (m/end\.\s*$/i) {$found_terminating_end = 1;}
|
|
|
10392 |
# elsif (m/\S/) {$found_terminating_end = 0;}
|
|
|
10393 |
}
|
|
|
10394 |
close(PASCAL_FILE);
|
|
|
10395 |
|
|
|
10396 |
# Okay, we've examined the entire file looking for clues;
|
|
|
10397 |
# let's use those clues to determine if it's really Pascal:
|
|
|
10398 |
|
|
|
10399 |
if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
|
|
|
10400 |
$has_begin && $found_terminating_end ) ||
|
|
|
10401 |
( $has_module && $found_terminating_end ) ||
|
|
|
10402 |
( $has_program && $has_begin && $found_terminating_end ) )
|
|
|
10403 |
{$is_pascal = 1;}
|
|
|
10404 |
|
|
|
10405 |
return $is_pascal;
|
|
|
10406 |
} # 1}}}
|
|
|
10407 |
sub really_is_incpascal { # {{{1
|
|
|
10408 |
# Given filename, returns TRUE if its contents really are Pascal.
|
|
|
10409 |
# For .inc files (mainly seen in fpc)
|
|
|
10410 |
|
|
|
10411 |
my $filename = shift;
|
|
|
10412 |
chomp($filename);
|
|
|
10413 |
|
|
|
10414 |
# The heuristic is as follows: it is Pacal if any of the following:
|
|
|
10415 |
# 1. really_is_pascal returns true
|
|
|
10416 |
# 2. Any usual reserverd word is found (program, unit, const, begin...)
|
|
|
10417 |
|
|
|
10418 |
# If the general routine for Pascal files works, we have it
|
|
|
10419 |
if (really_is_pascal($filename)) {
|
|
|
10420 |
return 1;
|
|
|
10421 |
}
|
|
|
10422 |
|
|
|
10423 |
my $is_pascal = 0; # Value to determine.
|
|
|
10424 |
my $found_begin = 0;
|
|
|
10425 |
|
|
|
10426 |
open(PASCAL_FILE, "<$filename") ||
|
|
|
10427 |
die "Can't open $filename to determine if it's pascal.\n";
|
|
|
10428 |
while(<PASCAL_FILE>) {
|
|
|
10429 |
s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective.
|
|
|
10430 |
s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective.
|
|
|
10431 |
if (m/\bprogram\s+[A-Za-z]/i) {$is_pascal=1;}
|
|
|
10432 |
if (m/\bunit\s+[A-Za-z]/i) {$is_pascal=1;}
|
|
|
10433 |
if (m/\bmodule\s+[A-Za-z]/i) {$is_pascal=1;}
|
|
|
10434 |
if (m/\bprocedure\b/i) {$is_pascal = 1; }
|
|
|
10435 |
if (m/\bfunction\b/i) {$is_pascal = 1; }
|
|
|
10436 |
if (m/^\s*interface\s+/i) {$is_pascal = 1; }
|
|
|
10437 |
if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
|
|
|
10438 |
if (m/\bconstant\s+/i) {$is_pascal=1;}
|
|
|
10439 |
if (m/\bbegin\b/i) { $found_begin = 1; }
|
|
|
10440 |
if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
|
|
|
10441 |
if ($is_pascal) {
|
|
|
10442 |
last;
|
|
|
10443 |
}
|
|
|
10444 |
}
|
|
|
10445 |
|
|
|
10446 |
close(PASCAL_FILE);
|
|
|
10447 |
return $is_pascal;
|
|
|
10448 |
} # 1}}}
|
|
|
10449 |
sub really_is_php { # {{{1
|
|
|
10450 |
# Given filename, returns TRUE if its contents really is php.
|
|
|
10451 |
|
|
|
10452 |
my $filename = shift;
|
|
|
10453 |
chomp($filename);
|
|
|
10454 |
|
|
|
10455 |
my $is_php = 0; # Value to determine.
|
|
|
10456 |
# Need to find a matching pair of surrounds, with ending after beginning:
|
|
|
10457 |
my $normal_surround = 0; # <?; bit 0 = <?, bit 1 = ?>
|
|
|
10458 |
my $script_surround = 0; # <script..>; bit 0 = <script language="php">
|
|
|
10459 |
my $asp_surround = 0; # <%; bit 0 = <%, bit 1 = %>
|
|
|
10460 |
|
|
|
10461 |
# Return cached result, if available:
|
|
|
10462 |
if ($php_files{$filename}) { return $php_files{$filename};}
|
|
|
10463 |
|
|
|
10464 |
open(PHP_FILE, "<$filename") ||
|
|
|
10465 |
die "Can't open $filename to determine if it's php.\n";
|
|
|
10466 |
while(<PHP_FILE>) {
|
|
|
10467 |
if (m/\<\?/) { $normal_surround |= 1; }
|
|
|
10468 |
if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
|
|
|
10469 |
if (m/\<script.*language="?php"?/i) { $script_surround |= 1; }
|
|
|
10470 |
if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
|
|
|
10471 |
if (m/\<\%/) { $asp_surround |= 1; }
|
|
|
10472 |
if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
|
|
|
10473 |
}
|
|
|
10474 |
close(PHP_FILE);
|
|
|
10475 |
|
|
|
10476 |
if ( ($normal_surround == 3) || ($script_surround == 3) ||
|
|
|
10477 |
($asp_surround == 3)) {
|
|
|
10478 |
$is_php = 1;
|
|
|
10479 |
}
|
|
|
10480 |
|
|
|
10481 |
$php_files{$filename} = $is_php; # Store result in cache.
|
|
|
10482 |
|
|
|
10483 |
return $is_php;
|
|
|
10484 |
} # 1}}}
|
|
|
10485 |
__END__
|
|
|
10486 |
mode values (stat $item)[2]
|
|
|
10487 |
Unix Windows
|
|
|
10488 |
file: 33188 33206
|
|
|
10489 |
dir : 16832 16895
|
|
|
10490 |
link: 33261 33206
|
|
|
10491 |
pipe: 4544 null
|