| 229 |
dpurdie |
1 |
#!/usr/bin/env perl
|
|
|
2 |
# cloc -- Count Lines of Code {{{1
|
|
|
3 |
# Copyright (C) 2006 Northrop Grumman Corporation
|
|
|
4 |
# Author: Al Danial <al.danial@gmail.com>
|
|
|
5 |
# First release August 2006
|
|
|
6 |
#
|
|
|
7 |
# Includes code from:
|
|
|
8 |
# - SLOCCount v2.26
|
|
|
9 |
# http://www.dwheeler.com/sloccount/
|
|
|
10 |
# by David Wheeler.
|
|
|
11 |
# - Regexp::Common v2.120
|
|
|
12 |
# http://search.cpan.org/~abigail/Regexp-Common-2.120/lib/Regexp/Common.pm
|
|
|
13 |
# by Damian Conway and Abigail
|
|
|
14 |
#
|
|
|
15 |
# This program is free software; you can redistribute it and/or modify
|
|
|
16 |
# it under the terms of the GNU General Public License as published by
|
|
|
17 |
# the Free Software Foundation; either version 2 of the License, or
|
|
|
18 |
# (at your option) any later version.
|
|
|
19 |
#
|
|
|
20 |
# This program is distributed in the hope that it will be useful,
|
|
|
21 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
22 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
23 |
# GNU General Public License for more details:
|
|
|
24 |
# http://www.gnu.org/licenses/gpl.txt
|
|
|
25 |
#
|
|
|
26 |
# You should have received a copy of the GNU General Public License
|
|
|
27 |
# along with this program; if not, write to the Free Software
|
|
|
28 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
29 |
# 1}}}
|
|
|
30 |
my $VERSION = 1.00;
|
|
|
31 |
require 5.006;
|
|
|
32 |
# use modules {{{1
|
|
|
33 |
use warnings;
|
|
|
34 |
use strict;
|
|
|
35 |
use Getopt::Long;
|
|
|
36 |
use File::Basename;
|
|
|
37 |
use File::Temp qw { tempfile tempdir };
|
|
|
38 |
use File::Find;
|
|
|
39 |
use File::Path;
|
|
|
40 |
use IO::File;
|
|
|
41 |
|
|
|
42 |
# Digest::MD5 isn't in the standard distribution. Use it only if installed.
|
|
|
43 |
my $HAVE_Digest_MD5 = 0;
|
|
|
44 |
eval "use Digest::MD5;";
|
|
|
45 |
if (defined $Digest::MD5::VERSION) {
|
|
|
46 |
$HAVE_Digest_MD5 = 1;
|
|
|
47 |
} else {
|
|
|
48 |
warn "Digest::MD5 not installed; will skip file uniqueness checks.\n";
|
|
|
49 |
}
|
|
|
50 |
|
|
|
51 |
my $HAVE_Rexexp_Common;
|
|
|
52 |
# Regexp::Common also isn't in the standard distribution. It will
|
|
|
53 |
# be installed in a temp directory if necessary.
|
|
|
54 |
BEGIN {
|
|
|
55 |
if (eval "use Regexp::Common;") {
|
|
|
56 |
$HAVE_Rexexp_Common = 1;
|
|
|
57 |
} else {
|
|
|
58 |
$HAVE_Rexexp_Common = 0;
|
|
|
59 |
}
|
|
|
60 |
}
|
|
|
61 |
|
|
|
62 |
# Uncomment next two lines when building Windows executable with perl2exe
|
|
|
63 |
# or if running on a system that already has Regexp::Common.
|
|
|
64 |
#use Regexp::Common;
|
|
|
65 |
#$HAVE_Rexexp_Common = 1;
|
|
|
66 |
|
|
|
67 |
#perl2exe_include "Regexp/Common/whitespace.pm"
|
|
|
68 |
#perl2exe_include "Regexp/Common/URI.pm"
|
|
|
69 |
#perl2exe_include "Regexp/Common/URI/fax.pm"
|
|
|
70 |
#perl2exe_include "Regexp/Common/URI/file.pm"
|
|
|
71 |
#perl2exe_include "Regexp/Common/URI/ftp.pm"
|
|
|
72 |
#perl2exe_include "Regexp/Common/URI/gopher.pm"
|
|
|
73 |
#perl2exe_include "Regexp/Common/URI/http.pm"
|
|
|
74 |
#perl2exe_include "Regexp/Common/URI/pop.pm"
|
|
|
75 |
#perl2exe_include "Regexp/Common/URI/prospero.pm"
|
|
|
76 |
#perl2exe_include "Regexp/Common/URI/news.pm"
|
|
|
77 |
#perl2exe_include "Regexp/Common/URI/tel.pm"
|
|
|
78 |
#perl2exe_include "Regexp/Common/URI/telnet.pm"
|
|
|
79 |
#perl2exe_include "Regexp/Common/URI/tv.pm"
|
|
|
80 |
#perl2exe_include "Regexp/Common/URI/wais.pm"
|
|
|
81 |
#perl2exe_include "Regexp/Common/CC.pm"
|
|
|
82 |
#perl2exe_include "Regexp/Common/SEN.pm"
|
|
|
83 |
#perl2exe_include "Regexp/Common/number.pm"
|
|
|
84 |
#perl2exe_include "Regexp/Common/delimited.pm"
|
|
|
85 |
#perl2exe_include "Regexp/Common/profanity.pm"
|
|
|
86 |
#perl2exe_include "Regexp/Common/net.pm"
|
|
|
87 |
#perl2exe_include "Regexp/Common/zip.pm"
|
|
|
88 |
#perl2exe_include "Regexp/Common/comment.pm"
|
|
|
89 |
#perl2exe_include "Regexp/Common/balanced.pm"
|
|
|
90 |
#perl2exe_include "Regexp/Common/lingua.pm"
|
|
|
91 |
#perl2exe_include "Regexp/Common/list.pm"
|
|
|
92 |
#perl2exe_include "File/Glob.pm"
|
|
|
93 |
|
|
|
94 |
use Text::Tabs qw { expand };
|
|
|
95 |
#use Data::Dumper::Simple;
|
|
|
96 |
#use Data::Dumper;
|
|
|
97 |
use Cwd qw { cwd };
|
|
|
98 |
# 1}}}
|
|
|
99 |
# Usage information, options processing. {{{1
|
|
|
100 |
my $script = basename $0;
|
|
|
101 |
my $usage = "
|
|
|
102 |
Usage: $script [options] <file(s)/dir(s)> | <report files>
|
|
|
103 |
|
|
|
104 |
Count physical lines of source code in the given files and/or
|
|
|
105 |
recursively below the given directories.
|
|
|
106 |
|
|
|
107 |
Options:
|
|
|
108 |
--by-file Report results for every source file encountered
|
|
|
109 |
in addition to reporting by language.
|
|
|
110 |
--categorized=<file> Save names of categorized files to <file>.
|
|
|
111 |
--counted=<file> Save names of processed source files to <file>.
|
|
|
112 |
--exclude-dir=<D1>[,D2,] Exclude the given comma separated directories
|
|
|
113 |
D1, D2, D3, et cetera, from being scanned. For
|
|
|
114 |
example --exclude-dir=.cvs,.svn will skip
|
|
|
115 |
all files that have /.cvs/ or /.svn/ as part of
|
|
|
116 |
their path.
|
|
|
117 |
--exclude-lang=<L1>[,L2,] Exclude the given comma separated languages
|
|
|
118 |
L1, L2, L3, et cetera, from being counted.
|
|
|
119 |
--extract-with=<cmd> Use <cmd> to extract binary archive files (e.g.:
|
|
|
120 |
.tar.gz, .zip, .Z). Use the literal '>FILE<' as
|
|
|
121 |
a stand-in for the actual file(s) to be
|
|
|
122 |
extracted. For example, to count lines of code
|
|
|
123 |
in the input files
|
|
|
124 |
gcc-4.2.tar.gz perl-5.8.8.tar.gz
|
|
|
125 |
on Unix use
|
|
|
126 |
--extract-with='gzip -dc >FILE< | tar xfv -'
|
|
|
127 |
and on Windows use:
|
|
|
128 |
--extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\"
|
|
|
129 |
(if you have WinZip installed there).
|
|
|
130 |
--force-lang=<lang>,<ext> Process all files that have a <ext> extension
|
|
|
131 |
with the counter for language <lang>. For
|
|
|
132 |
example, to count all .f files with the
|
|
|
133 |
Fortran 90 counter (which expects files to
|
|
|
134 |
end with .f90) instead of the default Fortran 77
|
|
|
135 |
counter, use
|
|
|
136 |
--force-lang=\"Fortran 90\",f
|
|
|
137 |
The language name is case insensitive. This
|
|
|
138 |
option can be specified multiple times.
|
|
|
139 |
--found=<file> Save names of every file found to <file>.
|
|
|
140 |
--ignored=<file> Save names of ignored files and the reason they
|
|
|
141 |
were ignored to <file>.
|
|
|
142 |
--no3 Suppress third-generation language output.
|
|
|
143 |
This option can cause report summation to fail
|
|
|
144 |
if some reports were produced with this option
|
|
|
145 |
while others were produced without it.
|
|
|
146 |
--print-filter-stages Print to STDOUT processed source code before and
|
|
|
147 |
after each filter is applied.
|
|
|
148 |
--progress-rate=<n> Show progress update after every <n> files are
|
|
|
149 |
processed (default <n>=100).
|
|
|
150 |
--quiet Suppress all information messages except for
|
|
|
151 |
the final report.
|
|
|
152 |
--report-file=<file> Write the results to <file> instead of STDOUT.
|
|
|
153 |
--read-lang-def=<file> Load from <file> the language processing filters.
|
|
|
154 |
(see also --write-lang-def) then use these filters
|
|
|
155 |
instead of the built-in filters.
|
|
|
156 |
--sdir=<dir> Use <dir> as the scratch directory instead of
|
|
|
157 |
letting File::Temp chose the location. Files
|
|
|
158 |
written to this location are not removed at
|
|
|
159 |
the end of the run (as they are with File::Temp).
|
|
|
160 |
--show-ext[=<ext>] Print information about all known (or just the
|
|
|
161 |
given) file extensions and exit.
|
|
|
162 |
--show-lang[=<lang>] Print information about all known (or just the
|
|
|
163 |
given) languages and exit.
|
|
|
164 |
--strip-comments=<ext> For each file processed, write to the current
|
|
|
165 |
directory a version of the file which has blank
|
|
|
166 |
lines and comments removed. The name of each
|
|
|
167 |
stripped file is the original file name with
|
|
|
168 |
.<ext> appended to it.
|
|
|
169 |
--sum-reports Input arguments are report files previously
|
|
|
170 |
created with the --report-file option. Makes
|
|
|
171 |
a cumulative set of results containing the
|
|
|
172 |
sum of data from the individual report files.
|
|
|
173 |
--write-lang-def=<file> Writes to <file> the language processing filters
|
|
|
174 |
then exits. Useful as a first step to creating
|
|
|
175 |
custom language definitions (see --read-lang-def).
|
|
|
176 |
-v[=<n>] Verbose switch (optional numeric value).
|
|
|
177 |
--version Print the version of this program and exit.
|
|
|
178 |
--csv Write the results as comma separated values.
|
|
|
179 |
--xml Write the results in XML.
|
|
|
180 |
--yaml Write the results in YAML.
|
|
|
181 |
|
|
|
182 |
";
|
|
|
183 |
$| = 1; # flush STDOUT
|
|
|
184 |
my $start_time = time();
|
|
|
185 |
my (
|
|
|
186 |
$opt_categorized ,
|
|
|
187 |
$opt_found ,
|
|
|
188 |
@opt_force_lang ,
|
|
|
189 |
$opt_ignored ,
|
|
|
190 |
$opt_counted ,
|
|
|
191 |
$opt_show_ext ,
|
|
|
192 |
$opt_show_lang ,
|
|
|
193 |
$opt_progress_rate ,
|
|
|
194 |
$opt_print_filter_stages ,
|
|
|
195 |
$opt_v ,
|
|
|
196 |
$opt_version ,
|
|
|
197 |
$opt_exclude_lang ,
|
|
|
198 |
$opt_exclude_dir ,
|
|
|
199 |
$opt_read_lang_def ,
|
|
|
200 |
$opt_write_lang_def ,
|
|
|
201 |
$opt_strip_comments ,
|
|
|
202 |
$opt_quiet ,
|
|
|
203 |
$opt_report_file ,
|
|
|
204 |
$opt_sdir ,
|
|
|
205 |
$opt_sum_reports ,
|
|
|
206 |
$opt_no3 ,
|
|
|
207 |
$opt_extract_with ,
|
|
|
208 |
$opt_by_file ,
|
|
|
209 |
$opt_xml ,
|
|
|
210 |
$opt_yaml ,
|
|
|
211 |
$opt_csv ,
|
|
|
212 |
);
|
|
|
213 |
GetOptions(
|
|
|
214 |
"by_file" => \$opt_by_file ,
|
|
|
215 |
"by-file" => \$opt_by_file ,
|
|
|
216 |
"categorized=s" => \$opt_categorized ,
|
|
|
217 |
"counted=s" => \$opt_counted ,
|
|
|
218 |
"exclude_lang=s" => \$opt_exclude_lang ,
|
|
|
219 |
"exclude-lang=s" => \$opt_exclude_lang ,
|
|
|
220 |
"exclude_dir=s" => \$opt_exclude_dir ,
|
|
|
221 |
"exclude-dir=s" => \$opt_exclude_dir ,
|
|
|
222 |
"extract_with=s" => \$opt_extract_with ,
|
|
|
223 |
"extract-with=s" => \$opt_extract_with ,
|
|
|
224 |
"found=s" => \$opt_found ,
|
|
|
225 |
"ignored=s" => \$opt_ignored ,
|
|
|
226 |
"quiet" => \$opt_quiet ,
|
|
|
227 |
"read_lang_def=s" => \$opt_read_lang_def ,
|
|
|
228 |
"read-lang-def=s" => \$opt_read_lang_def ,
|
|
|
229 |
"show_ext:s" => \$opt_show_ext ,
|
|
|
230 |
"show-ext:s" => \$opt_show_ext ,
|
|
|
231 |
"show_lang:s" => \$opt_show_lang ,
|
|
|
232 |
"show-lang:s" => \$opt_show_lang ,
|
|
|
233 |
"progress_rate=i" => \$opt_progress_rate ,
|
|
|
234 |
"progress-rate=i" => \$opt_progress_rate ,
|
|
|
235 |
"print_filter_stages" => \$opt_print_filter_stages ,
|
|
|
236 |
"print-filter-stages" => \$opt_print_filter_stages ,
|
|
|
237 |
"report_file=s" => \$opt_report_file ,
|
|
|
238 |
"report-file=s" => \$opt_report_file ,
|
|
|
239 |
"sdir=s" => \$opt_sdir ,
|
|
|
240 |
"strip_comments=s" => \$opt_strip_comments ,
|
|
|
241 |
"strip-comments=s" => \$opt_strip_comments ,
|
|
|
242 |
"sum_reports" => \$opt_sum_reports ,
|
|
|
243 |
"sum-reports" => \$opt_sum_reports ,
|
|
|
244 |
"no3" => \$opt_no3 ,
|
|
|
245 |
"v:i" => \$opt_v ,
|
|
|
246 |
"version" => \$opt_version ,
|
|
|
247 |
"write_lang_def=s" => \$opt_write_lang_def ,
|
|
|
248 |
"write-lang-def=s" => \$opt_write_lang_def ,
|
|
|
249 |
"xml" => \$opt_xml ,
|
|
|
250 |
"force_lang=s" => \@opt_force_lang ,
|
|
|
251 |
"force-lang=s" => \@opt_force_lang ,
|
|
|
252 |
"yaml" => \$opt_yaml ,
|
|
|
253 |
"csv" => \$opt_csv ,
|
|
|
254 |
);
|
|
|
255 |
|
|
|
256 |
my %Exclude_Language = ();
|
|
|
257 |
%Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang)
|
|
|
258 |
if $opt_exclude_lang;
|
|
|
259 |
my %Exclude_Dir = ();
|
|
|
260 |
%Exclude_Dir = map { $_ => 1 } split(/,/, $opt_exclude_dir )
|
|
|
261 |
if $opt_exclude_dir ;
|
|
|
262 |
# Options defaults:
|
|
|
263 |
$opt_progress_rate = 100 unless $opt_progress_rate;
|
|
|
264 |
$opt_v = 0 unless $opt_v;
|
|
|
265 |
die $usage unless defined $opt_version or
|
|
|
266 |
defined $opt_show_lang or
|
|
|
267 |
defined $opt_show_ext or
|
|
|
268 |
defined $opt_write_lang_def or
|
|
|
269 |
scalar @ARGV >= 1;
|
|
|
270 |
# 1}}}
|
|
|
271 |
# Step 1: Initialize global constants. {{{1
|
|
|
272 |
#
|
|
|
273 |
my $ON_WINDOWS = 0;
|
|
|
274 |
$ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT");
|
|
|
275 |
$ON_WINDOWS = 0 if $ENV{'SHELL'}; # make Cygwin look like Unix
|
|
|
276 |
|
|
|
277 |
my $nFiles_Found = 0; # updated in make_file_list
|
|
|
278 |
my (%Language_by_Extension, %Language_by_Script,
|
|
|
279 |
%Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename,
|
|
|
280 |
%Language_by_File, %Scale_Factor, %Known_Binary_Archives,
|
|
|
281 |
);
|
|
|
282 |
my %Error_Codes = ( 'Unable to read' => -1,
|
|
|
283 |
'Neither file nor directory' => -2, );
|
|
|
284 |
if ($opt_read_lang_def) {
|
|
|
285 |
read_lang_def(
|
|
|
286 |
$opt_read_lang_def , # Sample values:
|
|
|
287 |
\%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77'
|
|
|
288 |
\%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell'
|
|
|
289 |
\%Language_by_File , # Language_by_File{makefile} = 'make'
|
|
|
290 |
\%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] =
|
|
|
291 |
# [ 'remove_matches' , '^\s*#' ]
|
|
|
292 |
\%Not_Code_Extension , # Not_Code_Extension{jpg} = 1
|
|
|
293 |
\%Not_Code_Filename , # Not_Code_Filename{README} = 1
|
|
|
294 |
\%Scale_Factor , # Scale_Factor{Perl} = 4.0
|
|
|
295 |
);
|
|
|
296 |
} else {
|
|
|
297 |
set_constants( #
|
|
|
298 |
\%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77'
|
|
|
299 |
\%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell'
|
|
|
300 |
\%Language_by_File , # Language_by_File{makefile} = 'make'
|
|
|
301 |
\%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] =
|
|
|
302 |
# [ 'remove_matches' , '^\s*#' ]
|
|
|
303 |
\%Not_Code_Extension , # Not_Code_Extension{jpg} = 1
|
|
|
304 |
\%Not_Code_Filename , # Not_Code_Filename{README} = 1
|
|
|
305 |
\%Scale_Factor , # Scale_Factor{Perl} = 4.0
|
|
|
306 |
\%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1
|
|
|
307 |
);
|
|
|
308 |
}
|
|
|
309 |
|
|
|
310 |
# Process command line provided extention-to-language mapping overrides.
|
|
|
311 |
# Make a hash of known languages in lower case for easier matching.
|
|
|
312 |
my %Recognized_Language_lc = (); # key = language name in lc, value = true name
|
|
|
313 |
foreach my $language (keys %Filters_by_Language) {
|
|
|
314 |
my $lang_lc = lc $language;
|
|
|
315 |
$Recognized_Language_lc{$lang_lc} = $language;
|
|
|
316 |
}
|
|
|
317 |
my %Forced_Extension = (); # file name extensions which user wants to count
|
|
|
318 |
foreach my $pair (@opt_force_lang) {
|
|
|
319 |
my ($lang, $extension) = split(',', $pair);
|
|
|
320 |
$Forced_Extension{$extension} = $lang;
|
|
|
321 |
my $lang_lc = lc $lang;
|
|
|
322 |
|
|
|
323 |
die "Unknown language '$lang' used with --force-lang option. " .
|
|
|
324 |
"The command\n $script --show-lang\n" .
|
|
|
325 |
"will print all recognized languages.\n"
|
|
|
326 |
unless $Recognized_Language_lc{$lang_lc};
|
|
|
327 |
$Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc};
|
|
|
328 |
}
|
|
|
329 |
|
|
|
330 |
# 1}}}
|
|
|
331 |
# Step 2: Early exits for display, summation. {{{1
|
|
|
332 |
#
|
|
|
333 |
if ($opt_version) {
|
|
|
334 |
printf "%.2f\n", $VERSION;
|
|
|
335 |
exit;
|
|
|
336 |
}
|
|
|
337 |
print_extension_info($opt_show_ext ) if defined $opt_show_ext ;
|
|
|
338 |
print_language_info( $opt_show_lang) if defined $opt_show_lang;
|
|
|
339 |
exit if (defined $opt_show_ext) or (defined $opt_show_lang);
|
|
|
340 |
|
|
|
341 |
# Windows doesn't expand wildcards. Use code from Sean M. Burke's
|
|
|
342 |
# Win32::Autoglob module to do this.
|
|
|
343 |
#print "Before glob have [", join(",", @ARGV), "]\n";
|
|
|
344 |
@ARGV = map {;
|
|
|
345 |
( defined($_) and m/[\*\?]/ ) ? sort(glob($_)) : $_
|
|
|
346 |
} @ARGV if $ON_WINDOWS;
|
|
|
347 |
;
|
|
|
348 |
#print "after glob have [", join(",", @ARGV), "]\n";
|
|
|
349 |
|
|
|
350 |
if ($opt_sum_reports) {
|
|
|
351 |
my %Results = ();
|
|
|
352 |
foreach my $type( "by language", "by report file" ) {
|
|
|
353 |
my $found_lang = combine_results(\@ARGV,
|
|
|
354 |
$type,
|
|
|
355 |
\%{$Results{ $type }},
|
|
|
356 |
\%Filters_by_Language );
|
|
|
357 |
next unless %Results;
|
|
|
358 |
my $end_time = time();
|
|
|
359 |
my @results = generate_report($VERSION, $end_time - $start_time,
|
|
|
360 |
$type,
|
|
|
361 |
\%{$Results{ $type }}, \%Scale_Factor);
|
|
|
362 |
if ($opt_report_file) {
|
|
|
363 |
my $ext = ".lang";
|
|
|
364 |
$ext = ".file" unless $type eq "by language";
|
|
|
365 |
next if !$found_lang and $ext eq ".lang";
|
|
|
366 |
write_file($opt_report_file . $ext, @results);
|
|
|
367 |
} else {
|
|
|
368 |
print "\n", join("\n", @results), "\n";
|
|
|
369 |
}
|
|
|
370 |
}
|
|
|
371 |
exit;
|
|
|
372 |
}
|
|
|
373 |
if ($opt_write_lang_def) {
|
|
|
374 |
write_lang_def($opt_write_lang_def ,
|
|
|
375 |
\%Language_by_Extension,
|
|
|
376 |
\%Language_by_Script ,
|
|
|
377 |
\%Language_by_File ,
|
|
|
378 |
\%Filters_by_Language ,
|
|
|
379 |
\%Not_Code_Extension ,
|
|
|
380 |
\%Not_Code_Filename ,
|
|
|
381 |
\%Scale_Factor ,
|
|
|
382 |
);
|
|
|
383 |
exit;
|
|
|
384 |
}
|
|
|
385 |
# 1}}}
|
|
|
386 |
# Step 3: Create a list of files to consider. {{{1
|
|
|
387 |
# a) If inputs are binary archives, first cd to a temp
|
|
|
388 |
# directory, expand the archive with the user-given
|
|
|
389 |
# extraction tool, then add the temp directory to
|
|
|
390 |
# the list of dirs to process.
|
|
|
391 |
# b) Create a list of every file that might contain source
|
|
|
392 |
# code. Ignore binary files, zero-sized files, and
|
|
|
393 |
# any file in a directory the user says to exclude.
|
|
|
394 |
# c) Determine the language for each file in the list.
|
|
|
395 |
#
|
|
|
396 |
my @binary_archive = ();
|
|
|
397 |
if ($opt_extract_with) {
|
|
|
398 |
my $cwd = cwd();
|
|
|
399 |
#print "cwd main = [$cwd]\n";
|
|
|
400 |
my @extract_location = ();
|
|
|
401 |
foreach my $bin_file (@ARGV) {
|
|
|
402 |
my $extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
|
403 |
chdir $extract_dir;
|
|
|
404 |
print "Using temp dir [$extract_dir] to extract $bin_file\n"
|
|
|
405 |
if $opt_v;
|
|
|
406 |
my $bin_file_full_path = "";
|
|
|
407 |
if (File::Spec->file_name_is_absolute( $bin_file )) {
|
|
|
408 |
$bin_file_full_path = $bin_file;
|
|
|
409 |
#print "bin_file_full_path (was ful) = [$bin_file_full_path]\n";
|
|
|
410 |
} else {
|
|
|
411 |
$bin_file_full_path = File::Spec->catfile( $cwd, $bin_file );
|
|
|
412 |
#print "bin_file_full_path (was rel) = [$bin_file_full_path]\n";
|
|
|
413 |
}
|
|
|
414 |
(my $extract_cmd = $opt_extract_with )
|
|
|
415 |
=~ s/>FILE</$bin_file_full_path/g;
|
|
|
416 |
print $extract_cmd, "\n";
|
|
|
417 |
system $extract_cmd;
|
|
|
418 |
push @extract_location, $extract_dir;
|
|
|
419 |
chdir $cwd;
|
|
|
420 |
}
|
|
|
421 |
# It is possible that the binary archive itself contains additional
|
|
|
422 |
# files compressed the same way (true for Java .ear files). Go
|
|
|
423 |
# through all the files that were extracted, see if they are binary
|
|
|
424 |
# archives and try to extract them. Lather, rinse, repeat.
|
|
|
425 |
my $binary_archives_exist = 1;
|
|
|
426 |
my $count_binary_archives = 0;
|
|
|
427 |
my $previous_count = 0;
|
|
|
428 |
while ($binary_archives_exist) {
|
|
|
429 |
@binary_archive = ();
|
|
|
430 |
foreach my $dir (@extract_location) {
|
|
|
431 |
find(\&archive_files, $dir); # populates global @binary_archive
|
|
|
432 |
}
|
|
|
433 |
foreach my $archive (@binary_archive) {
|
|
|
434 |
(my $extract_cmd = $opt_extract_with )
|
|
|
435 |
=~ s/>FILE</$archive/g;
|
|
|
436 |
my $extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
|
437 |
chdir $extract_dir;
|
|
|
438 |
print $extract_cmd, "\n";
|
|
|
439 |
system $extract_cmd;
|
|
|
440 |
push @extract_location, $extract_dir;
|
|
|
441 |
unlink $archive; # otherwise will be extracting it forever
|
|
|
442 |
}
|
|
|
443 |
$count_binary_archives = scalar @binary_archive;
|
|
|
444 |
if ($count_binary_archives == $previous_count) {
|
|
|
445 |
$binary_archives_exist = 0;
|
|
|
446 |
}
|
|
|
447 |
$previous_count = $count_binary_archives;
|
|
|
448 |
}
|
|
|
449 |
chdir $cwd;
|
|
|
450 |
|
|
|
451 |
@ARGV = @extract_location;
|
|
|
452 |
}
|
|
|
453 |
my @Errors = ();
|
|
|
454 |
my @file_list = (); # global variable updated in files()
|
|
|
455 |
my %Ignored = (); # files that are not counted (language not recognized or
|
|
|
456 |
# problems reading the file)
|
|
|
457 |
my $fh = make_file_list(\@ARGV, \%Error_Codes, \@Errors, \%Ignored);
|
|
|
458 |
# make_file_list populates global variable @file_list via call to
|
|
|
459 |
# File::Find's find() which in turn calls files()
|
|
|
460 |
# 1}}}
|
|
|
461 |
# Step 4: Remove duplicate files. {{{1
|
|
|
462 |
#
|
|
|
463 |
my %Language = ();
|
|
|
464 |
my %unique_source_file = ();
|
|
|
465 |
remove_duplicate_files($fh, \%Language , \%unique_source_file,
|
|
|
466 |
\%Error_Codes, \@Errors , \%Ignored);
|
|
|
467 |
printf "%8d unique file%s. \n",
|
|
|
468 |
plural_form(scalar keys %unique_source_file)
|
|
|
469 |
unless $opt_quiet;
|
|
|
470 |
# 1}}}
|
|
|
471 |
# Step 5: Count code, comments, blank lines. {{{1
|
|
|
472 |
#
|
|
|
473 |
|
|
|
474 |
my %Results_by_Language = ();
|
|
|
475 |
my %Results_by_File = ();
|
|
|
476 |
my $nCounted = 0;
|
|
|
477 |
foreach my $file (sort keys %unique_source_file) {
|
|
|
478 |
++$nCounted;
|
|
|
479 |
printf "Counting: %d\r", $nCounted unless $nCounted % $opt_progress_rate;
|
|
|
480 |
next if $Ignored{$file};
|
|
|
481 |
if ($Exclude_Language{$Language{$file}}) {
|
|
|
482 |
$Ignored{$file} = "--exclude_lang=$Language{$file}";
|
|
|
483 |
next;
|
|
|
484 |
}
|
|
|
485 |
if (!defined @{$Filters_by_Language{$Language{$file}} }) {
|
|
|
486 |
if ($Language{$file} eq "(unknown)") {
|
|
|
487 |
$Ignored{$file} = "language unknown (#1)";
|
|
|
488 |
} else {
|
|
|
489 |
$Ignored{$file} = "missing Filters_by_Language{$Language{$file}}";
|
|
|
490 |
}
|
|
|
491 |
next;
|
|
|
492 |
}
|
|
|
493 |
|
|
|
494 |
my ($all_line_count,
|
|
|
495 |
$blank_count ,
|
|
|
496 |
$comment_count ,
|
|
|
497 |
) = call_counter($file, $Language{$file});
|
|
|
498 |
my $code_count = $all_line_count - $blank_count - $comment_count;
|
|
|
499 |
if ($opt_by_file) {
|
|
|
500 |
$Results_by_File{$file}{'code' } = $code_count ;
|
|
|
501 |
$Results_by_File{$file}{'blank' } = $blank_count ;
|
|
|
502 |
$Results_by_File{$file}{'comment'} = $comment_count ;
|
|
|
503 |
$Results_by_File{$file}{'lang' } = $Language{$file};
|
|
|
504 |
$Results_by_File{$file}{'nFiles' } = 1;
|
|
|
505 |
}
|
|
|
506 |
|
|
|
507 |
++$Results_by_Language{$Language{$file}}{'nFiles'};
|
|
|
508 |
$Results_by_Language{$Language{$file}}{'code'} += $code_count ;
|
|
|
509 |
$Results_by_Language{$Language{$file}}{'blank'} += $blank_count ;
|
|
|
510 |
$Results_by_Language{$Language{$file}}{'comment'} += $comment_count;
|
|
|
511 |
}
|
|
|
512 |
my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
|
|
|
513 |
write_file($opt_ignored, @ignored_reasons ) if $opt_ignored;
|
|
|
514 |
write_file($opt_counted, sort keys %Language) if $opt_counted;
|
|
|
515 |
# 1}}}
|
|
|
516 |
# Step 6: Print results. {{{1
|
|
|
517 |
#
|
|
|
518 |
my $end_time = time();
|
|
|
519 |
printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored)
|
|
|
520 |
unless $opt_quiet;
|
|
|
521 |
print_errors(\%Error_Codes, \@Errors) if @Errors;
|
|
|
522 |
exit unless %Results_by_Language;
|
|
|
523 |
|
|
|
524 |
#use YAML; print YAML::Dump(\%Results_by_Language); die;
|
|
|
525 |
|
|
|
526 |
my @results = ();
|
|
|
527 |
unless ($opt_by_file) {
|
|
|
528 |
@results = generate_report( $VERSION, $end_time - $start_time,
|
|
|
529 |
"by language",
|
|
|
530 |
\%Results_by_Language, \%Scale_Factor);
|
|
|
531 |
if ($opt_report_file) { write_file($opt_report_file, @results); }
|
|
|
532 |
else { print "\n", join("\n", @results), "\n"; }
|
|
|
533 |
} else {
|
|
|
534 |
@results = generate_report( $VERSION, $end_time - $start_time,
|
|
|
535 |
"by file",
|
|
|
536 |
\%Results_by_File, \%Scale_Factor);
|
|
|
537 |
if ($opt_report_file) { write_file($opt_report_file, @results); }
|
|
|
538 |
else { print "\n", join("\n", @results), "\n"; }
|
|
|
539 |
}
|
|
|
540 |
# 1}}}
|
|
|
541 |
|
|
|
542 |
sub combine_results { # {{{1
|
|
|
543 |
# returns 1 if the inputs are categorized by language
|
|
|
544 |
# 0 if no identifiable language was found
|
|
|
545 |
my ($ra_report_files, # in
|
|
|
546 |
$report_type , # in "by language" or "by report file"
|
|
|
547 |
$rhh_count , # out count{TYPE}{nFiles|code|blank|comment|scaled}
|
|
|
548 |
$rhaa_Filters_by_Language , # in
|
|
|
549 |
) = @_;
|
|
|
550 |
|
|
|
551 |
my $found_language = 0;
|
|
|
552 |
|
|
|
553 |
foreach my $file (@{$ra_report_files}) {
|
|
|
554 |
my $IN = new IO::File $file, "r";
|
|
|
555 |
if (!defined $IN) {
|
|
|
556 |
warn "Unable to read $file; ignoring.\n";
|
|
|
557 |
next;
|
|
|
558 |
}
|
|
|
559 |
while (<$IN>) {
|
|
|
560 |
next if /^(http|Language|SUM|-----)/;
|
|
|
561 |
if (m{^(.*?)\s+ # language
|
|
|
562 |
(\d+)\s+ # files
|
|
|
563 |
(\d+)\s+ # blank
|
|
|
564 |
(\d+)\s+ # comments
|
|
|
565 |
(\d+)\s+ # code
|
|
|
566 |
( # next four entries missing with -nno3
|
|
|
567 |
x\s+ # x
|
|
|
568 |
\d+\.\d+\s+ # scale
|
|
|
569 |
=\s+ # =
|
|
|
570 |
(\d+\.\d+)\s* # scaled code
|
|
|
571 |
)?
|
|
|
572 |
$}x) {
|
|
|
573 |
if ($report_type eq "by language") {
|
| 369 |
dpurdie |
574 |
next unless (%{$rhaa_Filters_by_Language->{$1}});
|
| 229 |
dpurdie |
575 |
# above test necessary to avoid trying to sum reports
|
|
|
576 |
# of reports (which have no language breakdown).
|
|
|
577 |
$found_language = 1;
|
|
|
578 |
$rhh_count->{$1 }{'nFiles' } += $2;
|
|
|
579 |
$rhh_count->{$1 }{'blank' } += $3;
|
|
|
580 |
$rhh_count->{$1 }{'comment'} += $4;
|
|
|
581 |
$rhh_count->{$1 }{'code' } += $5;
|
|
|
582 |
$rhh_count->{$1 }{'scaled' } += $7 unless $opt_no3;
|
|
|
583 |
} else {
|
|
|
584 |
$rhh_count->{$file}{'nFiles' } += $2;
|
|
|
585 |
$rhh_count->{$file}{'blank' } += $3;
|
|
|
586 |
$rhh_count->{$file}{'comment'} += $4;
|
|
|
587 |
$rhh_count->{$file}{'code' } += $5;
|
|
|
588 |
$rhh_count->{$file}{'scaled' } += $7 unless $opt_no3;
|
|
|
589 |
}
|
|
|
590 |
}
|
|
|
591 |
}
|
|
|
592 |
}
|
|
|
593 |
return $found_language;
|
|
|
594 |
|
|
|
595 |
} # 1}}}
|
|
|
596 |
sub generate_report { # {{{1
|
|
|
597 |
# returns an array of lines containing the results
|
|
|
598 |
my ($version , # in
|
|
|
599 |
$elapsed_sec, # in
|
|
|
600 |
$report_type, # in "by language" | "by report file" | "by file"
|
|
|
601 |
$rhh_count , # in count{TYPE}{nFiles|code|blank|comment|scaled}
|
|
|
602 |
$rh_scale , # in
|
|
|
603 |
) = @_;
|
|
|
604 |
|
|
|
605 |
my @results = ();
|
|
|
606 |
|
|
|
607 |
my $languages = ();
|
|
|
608 |
|
|
|
609 |
my $sum_files = 0;
|
|
|
610 |
my $sum_code = 0;
|
|
|
611 |
my $sum_blank = 0;
|
|
|
612 |
my $sum_comment = 0;
|
|
|
613 |
foreach my $language (keys %{$rhh_count}) {
|
|
|
614 |
$sum_files += $rhh_count->{$language}{'nFiles'} ;
|
|
|
615 |
$sum_blank += $rhh_count->{$language}{'blank'} ;
|
|
|
616 |
$sum_comment += $rhh_count->{$language}{'comment'};
|
|
|
617 |
$sum_code += $rhh_count->{$language}{'code'} ;
|
|
|
618 |
}
|
|
|
619 |
my $sum_lines = $sum_blank + $sum_comment + $sum_code;
|
|
|
620 |
$elapsed_sec = 0.5 unless $elapsed_sec;
|
|
|
621 |
|
|
|
622 |
my $spacing_1 = 13;
|
|
|
623 |
my $spacing_2 = 9;
|
|
|
624 |
my $spacing_3 = 17;
|
|
|
625 |
if ($opt_no3) {
|
|
|
626 |
$spacing_1 = 19;
|
|
|
627 |
$spacing_2 = 14;
|
|
|
628 |
$spacing_3 = 28;
|
|
|
629 |
}
|
|
|
630 |
my %Format = (
|
|
|
631 |
'1' => { 'xml' => 'name="%s" ',
|
|
|
632 |
'txt' => '%-23s ' ,
|
|
|
633 |
},
|
|
|
634 |
'2' => { 'xml' => 'name="%s" ',
|
|
|
635 |
'txt' => "\%-${spacing_3}s ",
|
|
|
636 |
},
|
|
|
637 |
'3' => { 'xml' => 'files_count="%d" ',
|
|
|
638 |
'txt' => '%5d ',
|
|
|
639 |
},
|
|
|
640 |
'4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
|
|
|
641 |
'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
|
|
|
642 |
},
|
|
|
643 |
'5' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
|
|
|
644 |
'txt' => ' x %6.2f = %14.2f',
|
|
|
645 |
},
|
|
|
646 |
);
|
|
|
647 |
my $Style = "txt";
|
|
|
648 |
$Style = "xml" if $opt_xml ;
|
|
|
649 |
$Style = "xml" if $opt_yaml; # not a typo; just set to anything but txt
|
|
|
650 |
$Style = "xml" if $opt_csv ; # not a typo; just set to anything but txt
|
|
|
651 |
|
|
|
652 |
my $URL = "http://cloc.sourceforge.net";
|
|
|
653 |
my $hyphen_line = sprintf "%s", '-' x 79;
|
|
|
654 |
my $data_line = "";
|
|
|
655 |
my $first_column;
|
|
|
656 |
my $BY_LANGUAGE = 0;
|
|
|
657 |
my $BY_FILE = 0;
|
|
|
658 |
if ($report_type eq "by language") {
|
|
|
659 |
$first_column = "Language";
|
|
|
660 |
$BY_LANGUAGE = 1;
|
|
|
661 |
} elsif ($report_type eq "by file") {
|
|
|
662 |
$first_column = "File";
|
|
|
663 |
$BY_FILE = 1;
|
|
|
664 |
} else {
|
|
|
665 |
$first_column = "Report File";
|
|
|
666 |
}
|
|
|
667 |
|
|
|
668 |
my $header_line = sprintf "%s v %4.2f", $URL, $version;
|
|
|
669 |
$header_line .= sprintf(" T=%.1f s (%.1f files/s, %.1f lines/s)",
|
|
|
670 |
$elapsed_sec ,
|
|
|
671 |
$sum_files/$elapsed_sec,
|
|
|
672 |
$sum_lines/$elapsed_sec) unless $opt_sum_reports;
|
|
|
673 |
if ($opt_xml) {
|
|
|
674 |
push @results, "<?xml version=\"1.0\"?>";
|
|
|
675 |
push @results, "<results>";
|
|
|
676 |
push @results, "<header>$header_line</header>";
|
|
|
677 |
} elsif ($opt_yaml) {
|
|
|
678 |
push @results, "---\n# $header_line";
|
|
|
679 |
} elsif ($opt_csv) {
|
|
|
680 |
# append the header to the end of the column headers
|
|
|
681 |
# to keep the output a bit cleaner from a spreadsheet
|
|
|
682 |
# perspective
|
|
|
683 |
} else {
|
|
|
684 |
push @results, $header_line;
|
|
|
685 |
push @results, $hyphen_line;
|
|
|
686 |
}
|
|
|
687 |
|
|
|
688 |
if ($Style eq "txt") {
|
|
|
689 |
# column headers
|
|
|
690 |
$data_line = sprintf "%-${spacing_1}s ", $first_column;
|
|
|
691 |
if ($BY_FILE) {
|
|
|
692 |
$data_line .= sprintf "%${spacing_2}s " , " " ;
|
|
|
693 |
} else {
|
|
|
694 |
$data_line .= sprintf "%${spacing_2}s " , "files";
|
|
|
695 |
}
|
|
|
696 |
$data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
|
|
|
697 |
"blank" ,
|
|
|
698 |
"comment" ,
|
|
|
699 |
"code";
|
|
|
700 |
$data_line .= sprintf " %8s %14s",
|
|
|
701 |
"scale" ,
|
|
|
702 |
"3rd gen. equiv"
|
|
|
703 |
unless $opt_no3;
|
|
|
704 |
push @results, $data_line;
|
|
|
705 |
push @results, $hyphen_line;
|
|
|
706 |
}
|
|
|
707 |
if ($opt_csv) {
|
|
|
708 |
my $header2 = " ,";
|
|
|
709 |
$header2 = " ,files" unless $BY_FILE;
|
|
|
710 |
$header2 .= ",blank,comment,code";
|
|
|
711 |
$header2 .= ",scale,3rd gen. equiv" unless $opt_no3;
|
|
|
712 |
$header2 .= ',"' . $header_line . '"';
|
|
|
713 |
push @results, $header2;
|
|
|
714 |
}
|
|
|
715 |
|
|
|
716 |
my $sum_scaled = 0;
|
|
|
717 |
foreach my $lang_or_file (sort {
|
|
|
718 |
$rhh_count->{$b}{'code'} <=>
|
|
|
719 |
$rhh_count->{$a}{'code'}
|
|
|
720 |
}
|
|
|
721 |
keys %{$rhh_count}) {
|
|
|
722 |
my ($factor, $scaled);
|
|
|
723 |
if ($BY_LANGUAGE or $BY_FILE) {
|
|
|
724 |
$factor = 1;
|
|
|
725 |
if ($BY_LANGUAGE) {
|
|
|
726 |
if (defined $rh_scale->{$lang_or_file}) {
|
|
|
727 |
$factor = $rh_scale->{$lang_or_file};
|
|
|
728 |
} else {
|
|
|
729 |
warn "No scale factor for $lang_or_file; using 1.00";
|
|
|
730 |
}
|
|
|
731 |
} else { # by individual code file
|
|
|
732 |
$factor = $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
|
|
|
733 |
}
|
|
|
734 |
$scaled = $factor*$rhh_count->{$lang_or_file}{'code'};
|
|
|
735 |
} else {
|
|
|
736 |
if (!defined $rhh_count->{$lang_or_file}{'scaled'}) {
|
|
|
737 |
$opt_no3 = 1;
|
|
|
738 |
# If we're summing together files previously generated
|
|
|
739 |
# with --no3 then rhh_count->{$lang_or_file}{'scaled'}
|
|
|
740 |
# this variable will be undefined. That should only
|
|
|
741 |
# happen when summing together by file however.
|
|
|
742 |
} elsif ($BY_LANGUAGE) {
|
|
|
743 |
warn "Missing scaled language info for $lang_or_file\n";
|
|
|
744 |
}
|
|
|
745 |
unless ($opt_no3) {
|
|
|
746 |
$scaled = $rhh_count->{$lang_or_file}{'scaled'};
|
|
|
747 |
$factor = $scaled/$rhh_count->{$lang_or_file}{'code'};
|
|
|
748 |
}
|
|
|
749 |
}
|
|
|
750 |
|
|
|
751 |
if ($BY_FILE) {
|
|
|
752 |
$data_line = sprintf $Format{'1'}{$Style}, $lang_or_file;
|
|
|
753 |
} else {
|
|
|
754 |
$data_line = sprintf $Format{'2'}{$Style}, $lang_or_file;
|
|
|
755 |
}
|
|
|
756 |
$data_line .= sprintf $Format{3}{$Style} ,
|
|
|
757 |
$rhh_count->{$lang_or_file}{'nFiles'} unless $BY_FILE;
|
|
|
758 |
$data_line .= sprintf $Format{4}{$Style} ,
|
|
|
759 |
$rhh_count->{$lang_or_file}{'blank'} ,
|
|
|
760 |
$rhh_count->{$lang_or_file}{'comment'},
|
|
|
761 |
$rhh_count->{$lang_or_file}{'code'} ;
|
|
|
762 |
$data_line .= sprintf $Format{5}{$Style} ,
|
|
|
763 |
$factor ,
|
|
|
764 |
$scaled unless $opt_no3;
|
|
|
765 |
$sum_scaled += $scaled unless $opt_no3;
|
|
|
766 |
|
|
|
767 |
if ($opt_xml) {
|
|
|
768 |
if (defined $rhh_count->{$lang_or_file}{'lang'}) {
|
|
|
769 |
my $lang = $rhh_count->{$lang_or_file}{'lang'};
|
|
|
770 |
if (!defined $languages->{$lang}) {
|
|
|
771 |
$languages->{$lang} = $lang;
|
|
|
772 |
}
|
|
|
773 |
$data_line.=' language="' . $lang . '" ';
|
|
|
774 |
}
|
|
|
775 |
if ($BY_FILE) {
|
|
|
776 |
push @results, "<file " . $data_line . "/>";
|
|
|
777 |
} else {
|
|
|
778 |
push @results, "<language " . $data_line . "/>";
|
|
|
779 |
}
|
|
|
780 |
} elsif ($opt_yaml) {
|
|
|
781 |
push @results, $lang_or_file . ":";
|
|
|
782 |
push @results, " nFiles: " .$rhh_count->{$lang_or_file}{'nFiles'} ;
|
|
|
783 |
push @results, " blank: " .$rhh_count->{$lang_or_file}{'blank'} ;
|
|
|
784 |
push @results, " comment: ".$rhh_count->{$lang_or_file}{'comment'};
|
|
|
785 |
push @results, " code: " .$rhh_count->{$lang_or_file}{'code'}
|
|
|
786 |
unless $BY_FILE;
|
|
|
787 |
if (!$opt_no3) {
|
|
|
788 |
push @results, " scaled: " . $scaled;
|
|
|
789 |
push @results, " factor: " . $factor;
|
|
|
790 |
}
|
|
|
791 |
} elsif ($opt_csv) {
|
|
|
792 |
my $extra_3 = "";
|
|
|
793 |
$extra_3 = ",$factor,$scaled" unless $opt_no3;
|
|
|
794 |
push @results, $lang_or_file . "," .
|
|
|
795 |
$rhh_count->{$lang_or_file}{'nFiles'} . "," .
|
|
|
796 |
$rhh_count->{$lang_or_file}{'blank'} . "," .
|
|
|
797 |
$rhh_count->{$lang_or_file}{'comment'}. "," .
|
|
|
798 |
$rhh_count->{$lang_or_file}{'code'} .
|
|
|
799 |
$extra_3;
|
|
|
800 |
} else {
|
|
|
801 |
push @results, $data_line;
|
|
|
802 |
}
|
|
|
803 |
}
|
|
|
804 |
my $avg_scale = 1; # weighted average of scale factors
|
|
|
805 |
$avg_scale = sprintf("%.2f", $sum_scaled / $sum_code)
|
|
|
806 |
if $sum_code and !$opt_no3;
|
|
|
807 |
|
|
|
808 |
if ($opt_xml) {
|
|
|
809 |
$data_line = "";
|
|
|
810 |
if (!$BY_FILE) {
|
|
|
811 |
$data_line .= sprintf "sum_files=\"%d\" ", $sum_files;
|
|
|
812 |
}
|
|
|
813 |
$data_line .= sprintf $Format{'4'}{$Style},
|
|
|
814 |
$sum_blank ,
|
|
|
815 |
$sum_comment ,
|
|
|
816 |
$sum_code ;
|
|
|
817 |
$data_line .= sprintf $Format{'5'}{$Style},
|
|
|
818 |
$avg_scale ,
|
|
|
819 |
$sum_scaled unless $opt_no3;
|
|
|
820 |
push @results, "<total " . $data_line . "/>";
|
|
|
821 |
push @results, "<languages>";
|
|
|
822 |
|
|
|
823 |
foreach my $language (keys %{$languages}) {
|
|
|
824 |
push @results, '<language name="' . $language . '"/>';
|
|
|
825 |
}
|
|
|
826 |
|
|
|
827 |
push @results, "</languages>";
|
|
|
828 |
push @results, "</results>";
|
|
|
829 |
} elsif ($opt_yaml) {
|
|
|
830 |
push @results, "SUM:";
|
|
|
831 |
push @results, " blank: " . $sum_blank ;
|
|
|
832 |
push @results, " code: " . $sum_code ;
|
|
|
833 |
push @results, " comment: ". $sum_comment;
|
|
|
834 |
push @results, " nFiles: " . $sum_files unless $BY_FILE;
|
|
|
835 |
if (!$opt_no3) {
|
|
|
836 |
push @results, " scaled: " . $sum_scaled;
|
|
|
837 |
push @results, " factor: " . $avg_scale ;
|
|
|
838 |
}
|
|
|
839 |
} elsif ($opt_csv) {
|
|
|
840 |
# do nothing
|
|
|
841 |
push @results, "SUMMARY" . "," .
|
|
|
842 |
$sum_files . "," .
|
|
|
843 |
$sum_blank . "," .
|
|
|
844 |
$sum_comment . "," .
|
|
|
845 |
$sum_code . "," .
|
|
|
846 |
scalar keys %Ignored;
|
|
|
847 |
|
|
|
848 |
|
|
|
849 |
} else {
|
|
|
850 |
|
|
|
851 |
if ($BY_FILE) {
|
|
|
852 |
$data_line = sprintf "%-23s ", "SUM:" ;
|
|
|
853 |
} else {
|
|
|
854 |
$data_line = sprintf "%-${spacing_1}s ", "SUM:" ;
|
|
|
855 |
$data_line .= sprintf "%${spacing_2}d ", $sum_files;
|
|
|
856 |
}
|
|
|
857 |
$data_line .= sprintf $Format{'4'}{$Style},
|
|
|
858 |
$sum_blank ,
|
|
|
859 |
$sum_comment ,
|
|
|
860 |
$sum_code ;
|
|
|
861 |
$data_line .= sprintf $Format{'5'}{$Style},
|
|
|
862 |
$avg_scale ,
|
|
|
863 |
$sum_scaled unless $opt_no3;
|
|
|
864 |
push @results, $hyphen_line if $sum_files > 1;
|
|
|
865 |
push @results, $data_line if $sum_files > 1;
|
|
|
866 |
push @results, $hyphen_line;
|
|
|
867 |
}
|
|
|
868 |
return @results;
|
|
|
869 |
} # 1}}}
|
|
|
870 |
sub print_errors { # {{{1
|
|
|
871 |
my ($rh_Error_Codes, # in
|
|
|
872 |
$raa_errors , # in
|
|
|
873 |
) = @_;
|
|
|
874 |
|
|
|
875 |
my %error_string = reverse(%{$rh_Error_Codes});
|
|
|
876 |
my $nErrors = scalar @{$raa_errors};
|
|
|
877 |
printf "\n%d error%s:\n", plural_form(scalar @Errors);
|
|
|
878 |
for (my $i = 0; $i < $nErrors; $i++) {
|
|
|
879 |
printf "%s: %s\n",
|
|
|
880 |
$error_string{ $raa_errors->[$i][0] },
|
|
|
881 |
$raa_errors->[$i][1] ;
|
|
|
882 |
}
|
|
|
883 |
print "\n";
|
|
|
884 |
|
|
|
885 |
} # 1}}}
|
|
|
886 |
sub write_lang_def { # {{{1
|
|
|
887 |
my ($file ,
|
|
|
888 |
$rh_Language_by_Extension , # in
|
|
|
889 |
$rh_Language_by_Script , # in
|
|
|
890 |
$rh_Language_by_File , # in
|
|
|
891 |
$rhaa_Filters_by_Language , # in
|
|
|
892 |
$rh_Not_Code_Extension , # in
|
|
|
893 |
$rh_Not_Code_Filename , # in
|
|
|
894 |
$rh_Scale_Factor , # in
|
|
|
895 |
) = @_;
|
|
|
896 |
|
|
|
897 |
my $OUT = new IO::File $file, "w";
|
|
|
898 |
die "Unable to write to $file\n" unless defined $OUT;
|
|
|
899 |
|
|
|
900 |
foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
|
|
|
901 |
next if $language eq "MATLAB/Objective C/MUMPS";
|
|
|
902 |
printf $OUT "%s\n", $language;
|
|
|
903 |
foreach my $filter (@{$rhaa_Filters_by_Language->{$language}}) {
|
|
|
904 |
printf $OUT " filter %s", $filter->[0];
|
|
|
905 |
printf $OUT " %s", $filter->[1] if defined $filter->[1];
|
|
|
906 |
print $OUT "\n";
|
|
|
907 |
}
|
|
|
908 |
foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
|
|
|
909 |
if ($language eq $rh_Language_by_Extension->{$ext}) {
|
|
|
910 |
printf $OUT " extension %s\n", $ext;
|
|
|
911 |
}
|
|
|
912 |
}
|
|
|
913 |
foreach my $filename (sort keys %{$rh_Language_by_File}) {
|
|
|
914 |
if ($language eq $rh_Language_by_File->{$filename}) {
|
|
|
915 |
printf $OUT " filename %s\n", $filename;
|
|
|
916 |
}
|
|
|
917 |
}
|
|
|
918 |
foreach my $script_exe (sort keys %{$rh_Language_by_Script}) {
|
|
|
919 |
if ($language eq $rh_Language_by_Script->{$script_exe}) {
|
|
|
920 |
printf $OUT " script_exe %s\n", $script_exe;
|
|
|
921 |
}
|
|
|
922 |
}
|
|
|
923 |
printf $OUT " 3rd_gen_scale %.2f\n", $rh_Scale_Factor->{$language};
|
|
|
924 |
}
|
|
|
925 |
|
|
|
926 |
$OUT->close;
|
|
|
927 |
} # 1}}}
|
|
|
928 |
sub read_lang_def { # {{{1
|
|
|
929 |
my ($file ,
|
|
|
930 |
$rh_Language_by_Extension , # out
|
|
|
931 |
$rh_Language_by_Script , # out
|
|
|
932 |
$rh_Language_by_File , # out
|
|
|
933 |
$rhaa_Filters_by_Language , # out
|
|
|
934 |
$rh_Not_Code_Extension , # out
|
|
|
935 |
$rh_Not_Code_Filename , # out
|
|
|
936 |
$rh_Scale_Factor , # out
|
|
|
937 |
) = @_;
|
|
|
938 |
|
|
|
939 |
my $IN = new IO::File $file, "r";
|
|
|
940 |
die "Unable to read $file.\n" unless defined $IN;
|
|
|
941 |
|
|
|
942 |
my $language = "";
|
|
|
943 |
while (<$IN>) {
|
|
|
944 |
next if /^\s*#/ or /^\s*$/;
|
|
|
945 |
|
|
|
946 |
if (/^(\w+.*?)\s*$/) {
|
|
|
947 |
$language = $1;
|
|
|
948 |
next;
|
|
|
949 |
}
|
|
|
950 |
die "Missing computer language name, line $. of $file\n"
|
|
|
951 |
unless $language;
|
|
|
952 |
|
|
|
953 |
if (/^ filter\s+(\w+)\s*$/) {
|
|
|
954 |
push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
|
|
|
955 |
|
|
|
956 |
} elsif (/^ filter\s+(\w+)\s+(.*?)\s*$/) {
|
|
|
957 |
push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
|
|
|
958 |
|
|
|
959 |
} elsif (/^ extension\s+(\S+)\s*$/) {
|
|
|
960 |
if (defined $rh_Language_by_Extension->{$1}) {
|
|
|
961 |
die "File extension collision: $1 ",
|
|
|
962 |
"maps to languages '$rh_Language_by_Extension->{$1}' ",
|
|
|
963 |
"and '$language'\n" ,
|
|
|
964 |
"Edit $file and remove $1 from one of these two ",
|
|
|
965 |
"language definitions.\n";
|
|
|
966 |
}
|
|
|
967 |
$rh_Language_by_Extension->{$1} = $language;
|
|
|
968 |
|
|
|
969 |
} elsif (/^ filename\s+(\S+)\s*$/) {
|
|
|
970 |
$rh_Language_by_File->{$1} = $language;
|
|
|
971 |
|
|
|
972 |
} elsif (/^ script_exe\s+(\S+)\s*$/) {
|
|
|
973 |
$rh_Language_by_Script->{$1} = $language;
|
|
|
974 |
|
|
|
975 |
} elsif (/^ 3rd_gen_scale\s+(\S+)\s*$/) {
|
|
|
976 |
$rh_Scale_Factor->{$language} = $1;
|
|
|
977 |
|
|
|
978 |
} else {
|
|
|
979 |
die "Unexpected data line $. of $file:\n$_\n";
|
|
|
980 |
}
|
|
|
981 |
|
|
|
982 |
}
|
|
|
983 |
$IN->close;
|
|
|
984 |
} # 1}}}
|
|
|
985 |
sub print_extension_info { # {{{1
|
|
|
986 |
my ($extension,) = @_;
|
|
|
987 |
if ($extension) { # show information on this extension
|
|
|
988 |
foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
|
|
|
989 |
# Language_by_Extension{f} = 'Fortran 77'
|
|
|
990 |
printf "%-12s -> %s\n", $ext, $Language_by_Extension{$ext}
|
|
|
991 |
if $ext =~ m{$extension}i;
|
|
|
992 |
}
|
|
|
993 |
} else { # show information on all extensions
|
|
|
994 |
foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
|
|
|
995 |
# Language_by_Extension{f} = 'Fortran 77'
|
|
|
996 |
printf "%-12s -> %s\n", $ext, $Language_by_Extension{$ext};
|
|
|
997 |
}
|
|
|
998 |
}
|
|
|
999 |
} # 1}}}
|
|
|
1000 |
sub print_language_info { # {{{1
|
|
|
1001 |
my ($language,) = @_;
|
|
|
1002 |
my %extensions = (); # the subset matched by the given $language value
|
|
|
1003 |
if ($language) { # show information on this language
|
|
|
1004 |
foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
|
|
|
1005 |
# Language_by_Extension{f} = 'Fortran 77'
|
|
|
1006 |
push @{$extensions{$Language_by_Extension{$ext}} }, $ext
|
|
|
1007 |
if $Language_by_Extension{$ext} =~ m{$language}i;
|
|
|
1008 |
}
|
|
|
1009 |
} else { # show information on all languages
|
|
|
1010 |
foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
|
|
|
1011 |
# Language_by_Extension{f} = 'Fortran 77'
|
|
|
1012 |
push @{$extensions{$Language_by_Extension{$ext}} }, $ext
|
|
|
1013 |
}
|
|
|
1014 |
}
|
|
|
1015 |
|
|
|
1016 |
# add exceptions (one file extension mapping to multiple languages)
|
|
|
1017 |
if (!$language or
|
|
|
1018 |
$language =~ /^(Objective C|MATLAB|MUMPS)$/i) {
|
|
|
1019 |
push @{$extensions{'Objective C'}}, "m";
|
|
|
1020 |
push @{$extensions{'MATLAB'}} , "m";
|
|
|
1021 |
push @{$extensions{'MUMPS'}} , "m";
|
|
|
1022 |
delete $extensions{'MATLAB/Objective C/MUMPS'};
|
|
|
1023 |
}
|
|
|
1024 |
|
|
|
1025 |
if (%extensions) {
|
|
|
1026 |
foreach my $lang (sort {lc $a cmp lc $b } keys %extensions) {
|
|
|
1027 |
printf "%-26s (%s)\n", $lang, join(", ", @{$extensions{$lang}});
|
|
|
1028 |
}
|
|
|
1029 |
}
|
|
|
1030 |
} # 1}}}
|
|
|
1031 |
sub make_file_list { # {{{1
|
|
|
1032 |
my ($ra_arg_list, # in file and/or directory names to examine
|
|
|
1033 |
$rh_Err , # in hash of error codes
|
|
|
1034 |
$raa_errors , # out errors encountered
|
|
|
1035 |
$rh_ignored , # out files not recognized as computer languages
|
|
|
1036 |
) = @_;
|
|
|
1037 |
|
|
|
1038 |
my ($fh, $filename);
|
|
|
1039 |
if ($opt_categorized) {
|
|
|
1040 |
$filename = $opt_categorized;
|
|
|
1041 |
$fh = new IO::File $filename, "+>"; # open for read/write
|
|
|
1042 |
die "Unable to write to $filename: $!\n" unless defined $fh;
|
|
|
1043 |
} elsif ($opt_sdir) {
|
|
|
1044 |
# write to the user-defined scratch directory
|
|
|
1045 |
$filename = $opt_sdir . '/cloc_file_list.txt';
|
|
|
1046 |
$fh = new IO::File $filename, "+>"; # open for read/write
|
|
|
1047 |
die "Unable to write to $filename: $!\n" unless defined $fh;
|
|
|
1048 |
} else {
|
|
|
1049 |
# let File::Temp create a suitable temporary file
|
|
|
1050 |
($fh, $filename) = tempfile(UNLINK => 1); # delete file on exit
|
|
|
1051 |
print "Using temp file list [$filename]\n" if $opt_v;
|
|
|
1052 |
}
|
|
|
1053 |
|
|
|
1054 |
my @dir_list = ();
|
|
|
1055 |
foreach my $file_or_dir (@{$ra_arg_list}) {
|
|
|
1056 |
#print "make_file_list file_or_dir=$file_or_dir\n";
|
|
|
1057 |
my $size_in_bytes = 0;
|
|
|
1058 |
if (!-r $file_or_dir) {
|
|
|
1059 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file_or_dir];
|
|
|
1060 |
next;
|
|
|
1061 |
}
|
|
|
1062 |
if (is_file($file_or_dir)) {
|
|
|
1063 |
if (!(-s $file_or_dir)) { # 0 sized file, named pipe, socket
|
|
|
1064 |
$rh_ignored->{$file_or_dir} = 'zero sized file';
|
|
|
1065 |
next;
|
|
|
1066 |
} elsif (-B $file_or_dir) { # avoid binary files
|
|
|
1067 |
$rh_ignored->{$file_or_dir} = 'binary file';
|
|
|
1068 |
next;
|
|
|
1069 |
}
|
|
|
1070 |
push @file_list, "$file_or_dir";
|
|
|
1071 |
} elsif (is_dir ($file_or_dir)) {
|
|
|
1072 |
push @dir_list, $file_or_dir;
|
|
|
1073 |
} else {
|
|
|
1074 |
push @{$raa_errors}, [$rh_Err->{'Neither file nor directory'} , $file_or_dir];
|
|
|
1075 |
$rh_ignored->{$file_or_dir} = 'not file, not directory';
|
|
|
1076 |
}
|
|
|
1077 |
}
|
|
|
1078 |
foreach my $dir (@dir_list) {
|
|
|
1079 |
#print "make_file_list dir=$dir\n";
|
|
|
1080 |
find(\&files, $dir); # populates global variable @file_list
|
|
|
1081 |
}
|
|
|
1082 |
$nFiles_Found = scalar @file_list;
|
|
|
1083 |
printf "%8d text file%s.\n", plural_form($nFiles_Found) unless $opt_quiet;
|
|
|
1084 |
write_file($opt_found, sort @file_list) if $opt_found;
|
|
|
1085 |
|
|
|
1086 |
my $nFiles_Categorized = 0;
|
|
|
1087 |
foreach my $file (@file_list) {
|
|
|
1088 |
printf "classifying $file\n" if $opt_v > 2;
|
|
|
1089 |
|
|
|
1090 |
my $basename = basename $file;
|
|
|
1091 |
if ($Not_Code_Filename{$basename}) {
|
|
|
1092 |
$rh_ignored->{$file} = "listed in " . '$' .
|
|
|
1093 |
"Not_Code_Filename{$basename}";
|
|
|
1094 |
next;
|
|
|
1095 |
} elsif ($basename =~ m{~$}) {
|
|
|
1096 |
$rh_ignored->{$file} = "temporary editor file";
|
|
|
1097 |
next;
|
|
|
1098 |
}
|
|
|
1099 |
|
|
|
1100 |
my $size_in_bytes = (stat $file)[7];
|
|
|
1101 |
my $language = classify_file($file ,
|
|
|
1102 |
$rh_Err ,
|
|
|
1103 |
$raa_errors,
|
|
|
1104 |
$rh_ignored);
|
|
|
1105 |
die "make_file_list($file) undef size" unless defined $size_in_bytes;
|
|
|
1106 |
die "make_file_list($file) undef lang" unless defined $language;
|
|
|
1107 |
printf $fh "%d,%s,%s\n", $size_in_bytes, $language, $file;
|
|
|
1108 |
++$nFiles_Categorized;
|
|
|
1109 |
printf "classified %d files\r",
|
|
|
1110 |
$nFiles_Categorized unless $nFiles_Categorized % $opt_progress_rate;
|
|
|
1111 |
}
|
|
|
1112 |
printf "classified %d files\r", $nFiles_Categorized
|
|
|
1113 |
if !$opt_quiet and $nFiles_Categorized > 1;
|
|
|
1114 |
|
|
|
1115 |
return $fh; # handle to the file containing the list of files to process
|
|
|
1116 |
} # 1}}}
|
|
|
1117 |
sub remove_duplicate_files { # {{{1
|
|
|
1118 |
my ($fh , # in
|
|
|
1119 |
$rh_Language , # out
|
|
|
1120 |
$rh_unique_source_file, # out
|
|
|
1121 |
$rh_Err , # in
|
|
|
1122 |
$raa_errors , # out errors encountered
|
|
|
1123 |
$rh_ignored , # out
|
|
|
1124 |
) = @_;
|
|
|
1125 |
|
|
|
1126 |
# Check for duplicate files by comparing file sizes.
|
|
|
1127 |
# Where files are equally sized, compare their MD5 checksums.
|
|
|
1128 |
|
|
|
1129 |
my $n = 0;
|
|
|
1130 |
my %files_by_size = (); # files_by_size{ # bytes } = [ list of files ]
|
|
|
1131 |
seek($fh, 0, 0); # rewind to beginning of the temp file
|
|
|
1132 |
while (<$fh>) {
|
|
|
1133 |
++$n;
|
|
|
1134 |
my ($size_in_bytes, $language, $file) = split(/,/, $_, 3);
|
|
|
1135 |
chomp($file);
|
|
|
1136 |
$rh_Language->{$file} = $language;
|
|
|
1137 |
push @{$files_by_size{$size_in_bytes}}, $file;
|
|
|
1138 |
}
|
|
|
1139 |
if ($n > $opt_progress_rate) {
|
|
|
1140 |
printf "Duplicate file check %d files (%d known unique)\r",
|
|
|
1141 |
$n, scalar keys %files_by_size;
|
|
|
1142 |
}
|
|
|
1143 |
$n = 0;
|
|
|
1144 |
foreach my $bytes (sort {$a <=> $b} keys %files_by_size) {
|
|
|
1145 |
++$n;
|
|
|
1146 |
printf "Unique: %8d files \r",
|
|
|
1147 |
$n unless $n % $opt_progress_rate;
|
|
|
1148 |
$rh_unique_source_file->{$files_by_size{$bytes}[0]} = 1;
|
|
|
1149 |
next unless scalar @{$files_by_size{$bytes}} > 1;
|
|
|
1150 |
foreach my $F (different_files(\@{$files_by_size{$bytes}},
|
|
|
1151 |
$rh_Err ,
|
|
|
1152 |
$raa_errors ,
|
|
|
1153 |
$rh_ignored ) ) {
|
|
|
1154 |
$rh_unique_source_file->{$F} = 1;
|
|
|
1155 |
}
|
|
|
1156 |
}
|
|
|
1157 |
} # 1}}}
|
|
|
1158 |
sub files { # {{{1
|
|
|
1159 |
# invoked by File::Find's find() Populates global variable @file_list
|
|
|
1160 |
if ($opt_exclude_dir) {
|
|
|
1161 |
my $return = 0;
|
|
|
1162 |
foreach my $skip_dir (keys %Exclude_Dir) {
|
|
|
1163 |
if ($File::Find::dir =~ m{/$skip_dir(/|$)} ) {
|
|
|
1164 |
$Ignored{$File::Find::name} = "--exclude_dir=$skip_dir";
|
|
|
1165 |
$return = 1;
|
|
|
1166 |
last;
|
|
|
1167 |
}
|
|
|
1168 |
}
|
|
|
1169 |
return if $return;
|
|
|
1170 |
}
|
|
|
1171 |
my $nBytes = -s $_ ;
|
|
|
1172 |
if (!$nBytes and $opt_v > 5) {
|
|
|
1173 |
printf "files(%s) zero size\n", $File::Find::name;
|
|
|
1174 |
}
|
|
|
1175 |
return unless $nBytes ; # attempting other tests w/pipe or socket will hang
|
|
|
1176 |
my $is_dir = is_dir($_);
|
|
|
1177 |
my $is_bin = -B $_ ;
|
|
|
1178 |
printf "files(%s) size=%d is_dir=%d -B=%d\n",
|
|
|
1179 |
$File::Find::name, $nBytes, $is_dir, $is_bin if $opt_v > 5;
|
|
|
1180 |
return if $is_dir or $is_bin;
|
|
|
1181 |
++$nFiles_Found;
|
|
|
1182 |
printf "%8d files\r", $nFiles_Found unless $nFiles_Found % $opt_progress_rate;
|
|
|
1183 |
push @file_list, $File::Find::name;
|
|
|
1184 |
} # 1}}}
|
|
|
1185 |
sub archive_files { # {{{1
|
|
|
1186 |
# invoked by File::Find's find() Populates global variable @binary_archive
|
|
|
1187 |
foreach my $ext (keys %Known_Binary_Archives) {
|
|
|
1188 |
push @binary_archive, $File::Find::name
|
|
|
1189 |
if $File::Find::name =~ m{$ext$};
|
|
|
1190 |
}
|
|
|
1191 |
} # 1}}}
|
|
|
1192 |
sub is_file { # {{{1
|
|
|
1193 |
# portable method to test if item is a file
|
|
|
1194 |
# (-f doesn't work in ActiveState Perl on Windows)
|
|
|
1195 |
my $item = shift @_;
|
|
|
1196 |
|
|
|
1197 |
if ($ON_WINDOWS) {
|
|
|
1198 |
my $mode = (stat $item)[2];
|
|
|
1199 |
$mode = 0 unless $mode;
|
|
|
1200 |
if ($mode & 0100000) { return 1; }
|
|
|
1201 |
else { return 0; }
|
|
|
1202 |
} else {
|
|
|
1203 |
return (-f $item); # works on Unix, Linux, CygWin, z/OS
|
|
|
1204 |
}
|
|
|
1205 |
} # 1}}}
|
|
|
1206 |
sub is_dir { # {{{1
|
|
|
1207 |
# portable method to test if item is a directory
|
|
|
1208 |
# (-d doesn't work in ActiveState Perl on Windows)
|
|
|
1209 |
my $item = shift @_;
|
|
|
1210 |
|
|
|
1211 |
if ($ON_WINDOWS) {
|
|
|
1212 |
my $mode = (stat $item)[2];
|
|
|
1213 |
$mode = 0 unless $mode;
|
|
|
1214 |
if ($mode & 0040000) { return 1; }
|
|
|
1215 |
else { return 0; }
|
|
|
1216 |
} else {
|
|
|
1217 |
return (-d $item); # works on Unix, Linux, CygWin, z/OS
|
|
|
1218 |
}
|
|
|
1219 |
} # 1}}}
|
|
|
1220 |
sub classify_file { # {{{1
|
|
|
1221 |
my ($full_file , # in
|
|
|
1222 |
$rh_Err , # in hash of error codes
|
|
|
1223 |
$raa_errors , # out
|
|
|
1224 |
$rh_ignored , # out
|
|
|
1225 |
) = @_;
|
|
|
1226 |
|
|
|
1227 |
print "-> classify_file($full_file)\n" if $opt_v > 2;
|
|
|
1228 |
my $language = "(unknown)";
|
|
|
1229 |
|
|
|
1230 |
my $look_at_first_line = 0;
|
|
|
1231 |
my $file = basename $full_file;
|
|
|
1232 |
return $language if $Not_Code_Filename{$file}; # (unknown)
|
|
|
1233 |
return $language if $file =~ m{~$}; # a temp edit file (unknown)
|
|
|
1234 |
|
|
|
1235 |
if ($file =~ /\.(\w+)$/) { # has an extension
|
|
|
1236 |
print "$full_file extension=[$1]\n" if $opt_v > 2;
|
|
|
1237 |
my $extension = $1;
|
|
|
1238 |
if ($Not_Code_Extension{$extension} and
|
|
|
1239 |
!$Forced_Extension{$extension}) {
|
|
|
1240 |
# If .1 (for example) is an extention that would ordinarily be
|
|
|
1241 |
# ignored but the user has insisted this be counted with the
|
|
|
1242 |
# --force-lang option, then go ahead and count it.
|
|
|
1243 |
$rh_ignored->{$full_file} =
|
|
|
1244 |
'listed in $Not_Code_Extension{' . $extension . '}';
|
|
|
1245 |
return $language;
|
|
|
1246 |
}
|
|
|
1247 |
if (defined $Language_by_Extension{$extension}) {
|
|
|
1248 |
if ($Language_by_Extension{$extension} eq
|
|
|
1249 |
'MATLAB/Objective C/MUMPS') {
|
|
|
1250 |
my $lang_M_or_O = "";
|
|
|
1251 |
matlab_or_objective_C($full_file ,
|
|
|
1252 |
$rh_Err ,
|
|
|
1253 |
$raa_errors,
|
|
|
1254 |
\$lang_M_or_O);
|
|
|
1255 |
if ($lang_M_or_O) {
|
|
|
1256 |
return $lang_M_or_O;
|
|
|
1257 |
} else { # an error happened in matlab_or_objective_C()
|
|
|
1258 |
$rh_ignored->{$full_file} =
|
|
|
1259 |
'failure in matlab_or_objective_C()';
|
|
|
1260 |
return $language; # (unknown)
|
|
|
1261 |
}
|
|
|
1262 |
} else {
|
|
|
1263 |
return $Language_by_Extension{$extension};
|
|
|
1264 |
}
|
|
|
1265 |
} else { # has an unmapped file extension
|
|
|
1266 |
$look_at_first_line = 1;
|
|
|
1267 |
}
|
|
|
1268 |
} elsif (defined $Language_by_File{lc $file}) {
|
|
|
1269 |
return $Language_by_File{lc $file};
|
|
|
1270 |
} else { # no file extension
|
|
|
1271 |
$look_at_first_line = 1;
|
|
|
1272 |
}
|
|
|
1273 |
|
|
|
1274 |
if ($look_at_first_line) {
|
|
|
1275 |
# maybe it is a shell/Perl/Python/Ruby/etc script that
|
|
|
1276 |
# starts with pound bang:
|
|
|
1277 |
# #!/usr/bin/perl
|
|
|
1278 |
# #!/usr/bin/env perl
|
|
|
1279 |
my $script_language = peek_at_first_line($full_file ,
|
|
|
1280 |
$rh_Err ,
|
|
|
1281 |
$raa_errors);
|
|
|
1282 |
if (!$script_language) {
|
|
|
1283 |
$rh_ignored->{$full_file} = "language unknown (#2)";
|
|
|
1284 |
# returns (unknown)
|
|
|
1285 |
}
|
|
|
1286 |
if (defined $Language_by_Script{$script_language}) {
|
|
|
1287 |
if (defined $Filters_by_Language{
|
|
|
1288 |
$Language_by_Script{$script_language}}) {
|
|
|
1289 |
$language = $Language_by_Script{$script_language};
|
|
|
1290 |
} else {
|
|
|
1291 |
$rh_ignored->{$full_file} =
|
|
|
1292 |
"undefined: Filters_by_Language{" .
|
|
|
1293 |
$Language_by_Script{$script_language} .
|
|
|
1294 |
"} for scripting language $script_language";
|
|
|
1295 |
# returns (unknown)
|
|
|
1296 |
}
|
|
|
1297 |
} else {
|
|
|
1298 |
$rh_ignored->{$full_file} = "language unknown (#3)";
|
|
|
1299 |
# returns (unknown)
|
|
|
1300 |
}
|
|
|
1301 |
}
|
|
|
1302 |
print "<- classify_file($full_file)\n" if $opt_v > 2;
|
|
|
1303 |
return $language;
|
|
|
1304 |
} # 1}}}
|
|
|
1305 |
sub peek_at_first_line { # {{{1
|
|
|
1306 |
my ($file , # in
|
|
|
1307 |
$rh_Err , # in hash of error codes
|
|
|
1308 |
$raa_errors , # out
|
|
|
1309 |
) = @_;
|
|
|
1310 |
|
|
|
1311 |
print "-> peek_at_first_line($file)\n" if $opt_v > 2;
|
|
|
1312 |
|
|
|
1313 |
my $script_language = "";
|
|
|
1314 |
if (!-r $file) {
|
|
|
1315 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
1316 |
return $script_language;
|
|
|
1317 |
}
|
|
|
1318 |
my $IN = new IO::File $file, "r";
|
|
|
1319 |
if (!defined $IN) {
|
|
|
1320 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
1321 |
print "<- peek_at_first_line($file)\n" if $opt_v > 2;
|
|
|
1322 |
return $script_language;
|
|
|
1323 |
}
|
|
|
1324 |
chomp(my $first_line = <$IN>);
|
|
|
1325 |
if (defined $first_line) {
|
|
|
1326 |
#print "peek_at_first_line of [$file] first_line=[$first_line]\n";
|
|
|
1327 |
if ($first_line =~ /^#\!\s*(\S.*?)$/) {
|
|
|
1328 |
#print "peek_at_first_line 1=[$1]\n";
|
|
|
1329 |
my @pound_bang = split(' ', $1);
|
|
|
1330 |
#print "peek_at_first_line basename 0=[", basename($pound_bang[0]), "]\n";
|
|
|
1331 |
if (basename($pound_bang[0]) eq "env" and
|
|
|
1332 |
scalar @pound_bang > 1) {
|
|
|
1333 |
$script_language = $pound_bang[1];
|
|
|
1334 |
#print "peek_at_first_line pound_bang A $pound_bang[1]\n";
|
|
|
1335 |
} else {
|
|
|
1336 |
$script_language = basename $pound_bang[0];
|
|
|
1337 |
#print "peek_at_first_line pound_bang B $script_language\n";
|
|
|
1338 |
}
|
|
|
1339 |
}
|
|
|
1340 |
}
|
|
|
1341 |
$IN->close;
|
|
|
1342 |
print "<- peek_at_first_line($file)\n" if $opt_v > 2;
|
|
|
1343 |
return $script_language;
|
|
|
1344 |
} # 1}}}
|
|
|
1345 |
sub different_files { # {{{1
|
|
|
1346 |
# See which of the given files are unique by computing each file's MD5
|
|
|
1347 |
# sum. Return the subset of files which are unique.
|
|
|
1348 |
my ($ra_files , # in
|
|
|
1349 |
$rh_Err , # in
|
|
|
1350 |
$raa_errors , # out
|
|
|
1351 |
$rh_ignored , # out
|
|
|
1352 |
) = @_;
|
|
|
1353 |
|
|
|
1354 |
print "-> different_files(@{$ra_files})\n" if $opt_v > 2;
|
|
|
1355 |
my %file_hash = (); # file_hash{ md5 hash } = file name
|
|
|
1356 |
foreach my $F (@{$ra_files}) {
|
|
|
1357 |
next if is_dir($F); # needed for Windows
|
|
|
1358 |
my $IN = new IO::File $F, "r";
|
|
|
1359 |
if (!defined $IN) {
|
|
|
1360 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
|
|
|
1361 |
$rh_ignored->{$F} = 'cannot read';
|
|
|
1362 |
} else {
|
|
|
1363 |
if ($HAVE_Digest_MD5) {
|
|
|
1364 |
binmode $IN;
|
|
|
1365 |
$file_hash{ Digest::MD5->new->addfile($IN)->hexdigest } = $F;
|
|
|
1366 |
} else {
|
|
|
1367 |
# all files treated unique
|
|
|
1368 |
$file_hash{ $F } = $F;
|
|
|
1369 |
}
|
|
|
1370 |
$IN->close;
|
|
|
1371 |
}
|
|
|
1372 |
}
|
|
|
1373 |
my @unique = values %file_hash;
|
|
|
1374 |
print "<- different_files(@unique)\n" if $opt_v > 2;
|
|
|
1375 |
return @unique;
|
|
|
1376 |
} # 1}}}
|
|
|
1377 |
sub call_counter { # {{{1
|
|
|
1378 |
my ($file , # in
|
|
|
1379 |
$language, # in
|
|
|
1380 |
) = @_;
|
|
|
1381 |
|
|
|
1382 |
# Logic: pass the file through the following filters:
|
|
|
1383 |
# 1. remove blank lines
|
|
|
1384 |
# 2. remove comments using each filter defined for this language
|
|
|
1385 |
# (example: SQL has two, remove_starts_with(--) and
|
|
|
1386 |
# remove_c_comments() )
|
|
|
1387 |
# 3. compute comment lines as
|
|
|
1388 |
# total lines - blank lines - lines left over after all
|
|
|
1389 |
# comment filters have been applied
|
|
|
1390 |
|
|
|
1391 |
print "-> call_counter($file, $language)\n" if $opt_v > 2;
|
|
|
1392 |
my @routines = @{$Filters_by_Language{$language}};
|
|
|
1393 |
#print "call_counter: ", Dumper(@routines), "\n";
|
|
|
1394 |
|
|
|
1395 |
my $IN = new IO::File $file, "r";
|
|
|
1396 |
my @lines = <$IN>;
|
|
|
1397 |
$IN->close;
|
|
|
1398 |
# Some files don't end with a new line. Force this:
|
|
|
1399 |
$lines[$#lines] .= "\n" unless $lines[$#lines] =~ m/\n$/;
|
|
|
1400 |
|
|
|
1401 |
my $total_lines = scalar @lines;
|
|
|
1402 |
|
|
|
1403 |
print_lines($file, "Original file:", \@lines) if $opt_print_filter_stages;
|
|
|
1404 |
if ($language eq "COBOL") {
|
|
|
1405 |
@lines = remove_cobol_blanks(\@lines);
|
|
|
1406 |
} else {
|
|
|
1407 |
@lines = remove_matches(\@lines, '^\s*$'); # removes blank lines
|
|
|
1408 |
}
|
|
|
1409 |
my $blank_lines = $total_lines - scalar @lines;
|
|
|
1410 |
print_lines($file, "Blank lines removed:", \@lines)
|
|
|
1411 |
if $opt_print_filter_stages;
|
|
|
1412 |
|
|
|
1413 |
foreach my $call_string (@routines) {
|
|
|
1414 |
#print "call_counter: call_string=", Dumper($call_string), "\n";
|
|
|
1415 |
my $subroutine = $call_string->[0];
|
|
|
1416 |
if (! defined &{$subroutine}) {
|
|
|
1417 |
warn "call_counter undefined subroutine $subroutine for $file\n";
|
|
|
1418 |
next;
|
|
|
1419 |
}
|
|
|
1420 |
print "call_counter file=$file sub=$subroutine\n" if $opt_v > 1;
|
|
|
1421 |
my @args = @{$call_string};
|
|
|
1422 |
shift @args; # drop the subroutine name
|
|
|
1423 |
if (@args and $args[0] eq '>filename<') {
|
|
|
1424 |
shift @args;
|
|
|
1425 |
unshift @args, $file;
|
|
|
1426 |
}
|
|
|
1427 |
|
|
|
1428 |
no strict 'refs';
|
|
|
1429 |
@lines = &{$subroutine}(\@lines, @args); # apply filter...
|
|
|
1430 |
|
|
|
1431 |
print_lines($file, "After $subroutine(@args)", \@lines)
|
|
|
1432 |
if $opt_print_filter_stages;
|
|
|
1433 |
@lines = remove_matches(\@lines, '^\s*$'); # ...then remove blank lines
|
|
|
1434 |
print_lines($file, "post $subroutine(@args) blanks cleanup:", \@lines)
|
|
|
1435 |
if $opt_print_filter_stages;
|
|
|
1436 |
}
|
|
|
1437 |
my $comment_lines = $total_lines - $blank_lines - scalar @lines;
|
|
|
1438 |
if ($opt_strip_comments) {
|
|
|
1439 |
my $stripped_file = basename $file . ".$opt_strip_comments";
|
|
|
1440 |
write_file($stripped_file, @lines);
|
|
|
1441 |
}
|
|
|
1442 |
|
|
|
1443 |
print "<- call_counter($total_lines, $blank_lines, $comment_lines)\n"
|
|
|
1444 |
if $opt_v > 2;
|
|
|
1445 |
return ($total_lines, $blank_lines, $comment_lines);
|
|
|
1446 |
} # 1}}}
|
|
|
1447 |
sub write_file { # {{{1
|
|
|
1448 |
my ($file , # in
|
|
|
1449 |
@lines , # in
|
|
|
1450 |
) = @_;
|
|
|
1451 |
|
|
|
1452 |
print "-> write_file($file)\n" if $opt_v > 2;
|
|
|
1453 |
|
|
|
1454 |
# Create the destination directory if it doesn't already exist.
|
|
|
1455 |
my $abs_file_path = File::Spec->rel2abs( $file );
|
|
|
1456 |
my ($volume, $directories, $filename) = File::Spec->splitpath( $abs_file_path );
|
|
|
1457 |
mkpath($volume . $directories, 1, 0777);
|
|
|
1458 |
|
|
|
1459 |
my $OUT = new IO::File $file, "w";
|
|
|
1460 |
if (defined $OUT) {
|
|
|
1461 |
chomp(@lines);
|
|
|
1462 |
print $OUT join("\n", @lines), "\n";
|
|
|
1463 |
$OUT->close;
|
|
|
1464 |
} else {
|
|
|
1465 |
warn "Unable to write to $file\n";
|
|
|
1466 |
}
|
|
|
1467 |
print "Wrote $file\n";
|
|
|
1468 |
|
|
|
1469 |
print "<- write_file\n" if $opt_v > 2;
|
|
|
1470 |
} # 1}}}
|
|
|
1471 |
sub remove_f77_comments { # {{{1
|
|
|
1472 |
my ($ra_lines, ) = @_;
|
|
|
1473 |
print "-> remove_f77_comments\n" if $opt_v > 2;
|
|
|
1474 |
|
|
|
1475 |
my @save_lines = ();
|
|
|
1476 |
foreach (@{$ra_lines}) {
|
|
|
1477 |
next if m{^[*cC]};
|
|
|
1478 |
push @save_lines, $_;
|
|
|
1479 |
}
|
|
|
1480 |
|
|
|
1481 |
print "<- remove_f77_comments\n" if $opt_v > 2;
|
|
|
1482 |
return @save_lines;
|
|
|
1483 |
} # 1}}}
|
|
|
1484 |
sub remove_f90_comments { # {{{1
|
|
|
1485 |
# derived from SLOCCount
|
|
|
1486 |
my ($ra_lines, ) = @_;
|
|
|
1487 |
print "-> remove_f90_comments\n" if $opt_v > 2;
|
|
|
1488 |
|
|
|
1489 |
my @save_lines = ();
|
|
|
1490 |
foreach (@{$ra_lines}) {
|
|
|
1491 |
# a comment is m/^\s*!/
|
|
|
1492 |
# an empty line is m/^\s*$/
|
|
|
1493 |
# a HPF statement is m/^\s*!hpf\$/i
|
|
|
1494 |
# an Open MP statement is m/^\s*!omp\$/i
|
|
|
1495 |
if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) {
|
|
|
1496 |
push @save_lines, $_;
|
|
|
1497 |
}
|
|
|
1498 |
}
|
|
|
1499 |
|
|
|
1500 |
print "<- remove_f90_comments\n" if $opt_v > 2;
|
|
|
1501 |
return @save_lines;
|
|
|
1502 |
} # 1}}}
|
|
|
1503 |
sub remove_matches { # {{{1
|
|
|
1504 |
my ($ra_lines, # in
|
|
|
1505 |
$pattern , # in Perl regular expression (case insensitive)
|
|
|
1506 |
) = @_;
|
|
|
1507 |
print "-> remove_matches(pattern=$pattern)\n" if $opt_v > 2;
|
|
|
1508 |
|
|
|
1509 |
my @save_lines = ();
|
|
|
1510 |
foreach (@{$ra_lines}) {
|
|
|
1511 |
next if m{$pattern}i;
|
|
|
1512 |
push @save_lines, $_;
|
|
|
1513 |
}
|
|
|
1514 |
|
|
|
1515 |
print "<- remove_matches\n" if $opt_v > 2;
|
|
|
1516 |
return @save_lines;
|
|
|
1517 |
} # 1}}}
|
|
|
1518 |
sub remove_above { # {{{1
|
|
|
1519 |
my ($ra_lines, $marker, ) = @_;
|
|
|
1520 |
print "-> remove_above(marker=$marker)\n" if $opt_v > 2;
|
|
|
1521 |
|
|
|
1522 |
# Make two passes through the code:
|
|
|
1523 |
# 1. check if the marker exists
|
|
|
1524 |
# 2. remove anything above the marker if it exists,
|
|
|
1525 |
# do nothing if the marker does not exist
|
|
|
1526 |
|
|
|
1527 |
# Pass 1
|
|
|
1528 |
my $found_marker = 0;
|
|
|
1529 |
for (my $line_number = 1;
|
|
|
1530 |
$line_number <= scalar @{$ra_lines};
|
|
|
1531 |
$line_number++) {
|
|
|
1532 |
if ($ra_lines->[$line_number-1] =~ m{$marker}) {
|
|
|
1533 |
$found_marker = $line_number;
|
|
|
1534 |
last;
|
|
|
1535 |
}
|
|
|
1536 |
}
|
|
|
1537 |
|
|
|
1538 |
# Pass 2 only if needed
|
|
|
1539 |
my @save_lines = ();
|
|
|
1540 |
if ($found_marker) {
|
|
|
1541 |
my $n = 1;
|
|
|
1542 |
foreach (@{$ra_lines}) {
|
|
|
1543 |
push @save_lines, $_
|
|
|
1544 |
if $n >= $found_marker;
|
|
|
1545 |
++$n;
|
|
|
1546 |
}
|
|
|
1547 |
} else { # marker wasn't found; save all lines
|
|
|
1548 |
foreach (@{$ra_lines}) {
|
|
|
1549 |
push @save_lines, $_;
|
|
|
1550 |
}
|
|
|
1551 |
}
|
|
|
1552 |
|
|
|
1553 |
print "<- remove_above\n" if $opt_v > 2;
|
|
|
1554 |
return @save_lines;
|
|
|
1555 |
} # 1}}}
|
|
|
1556 |
sub remove_below { # {{{1
|
|
|
1557 |
my ($ra_lines, $marker, ) = @_;
|
|
|
1558 |
print "-> remove_below(marker=$marker)\n" if $opt_v > 2;
|
|
|
1559 |
|
|
|
1560 |
my @save_lines = ();
|
|
|
1561 |
foreach (@{$ra_lines}) {
|
|
|
1562 |
last if m{$marker};
|
|
|
1563 |
push @save_lines, $_;
|
|
|
1564 |
}
|
|
|
1565 |
|
|
|
1566 |
print "<- remove_below\n" if $opt_v > 2;
|
|
|
1567 |
return @save_lines;
|
|
|
1568 |
} # 1}}}
|
|
|
1569 |
sub remove_between { # {{{1
|
|
|
1570 |
my ($ra_lines, $marker, ) = @_;
|
|
|
1571 |
# $marker must contain one of the balanced pairs understood
|
|
|
1572 |
# by Regexp::Common::balanced, namely
|
|
|
1573 |
# '{}' '()' '[]' or '<>'
|
|
|
1574 |
|
|
|
1575 |
print "-> remove_between(marker=$marker)\n" if $opt_v > 2;
|
|
|
1576 |
my %acceptable = ('{}'=>1, '()'=>1, '[]'=>1, '<>'=>1, );
|
|
|
1577 |
die "remove_between: invalid delimiter '$marker'\n",
|
|
|
1578 |
"the delimiter must be one of these four pairs:\n",
|
|
|
1579 |
"{} () [] <>\n" unless
|
|
|
1580 |
$acceptable{$marker};
|
|
|
1581 |
|
|
|
1582 |
Install_Regexp_Common() unless $HAVE_Rexexp_Common;
|
|
|
1583 |
|
|
|
1584 |
my $all_lines = join("", @{$ra_lines});
|
|
|
1585 |
|
|
|
1586 |
no strict 'vars';
|
|
|
1587 |
# otherwise get:
|
|
|
1588 |
# Global symbol "%RE" requires explicit package name at cloc line xx.
|
|
|
1589 |
if ($all_lines =~ m/$RE{balanced}{-parens => $marker}/) {
|
|
|
1590 |
no warnings;
|
|
|
1591 |
$all_lines =~ s/$1//g;
|
|
|
1592 |
}
|
|
|
1593 |
|
|
|
1594 |
print "<- remove_between\n" if $opt_v > 2;
|
|
|
1595 |
return split("\n", $all_lines);
|
|
|
1596 |
} # 1}}}
|
|
|
1597 |
sub remove_cobol_blanks { # {{{1
|
|
|
1598 |
# subroutines derived from SLOCCount
|
|
|
1599 |
my ($ra_lines, ) = @_;
|
|
|
1600 |
|
|
|
1601 |
my $free_format = 0; # Support "free format" source code.
|
|
|
1602 |
my @save_lines = ();
|
|
|
1603 |
|
|
|
1604 |
foreach (@{$ra_lines}) {
|
|
|
1605 |
next if m/^\s*$/;
|
|
|
1606 |
my $line = expand($_); # convert tabs to equivalent spaces
|
|
|
1607 |
$free_format = 1 if $line =~ m/^......\$.*SET.*SOURCEFORMAT.*FREE/i;
|
|
|
1608 |
if ($free_format) {
|
|
|
1609 |
push @save_lines, $_;
|
|
|
1610 |
} else {
|
|
|
1611 |
push @save_lines, $_ unless m/^\d{6}\s*$/ or
|
|
|
1612 |
($line =~ m/^\d{6}\s{66}/);
|
|
|
1613 |
}
|
|
|
1614 |
}
|
|
|
1615 |
return @save_lines;
|
|
|
1616 |
} # 1}}}
|
|
|
1617 |
sub remove_cobol_comments { # {{{1
|
|
|
1618 |
# subroutines derived from SLOCCount
|
|
|
1619 |
my ($ra_lines, ) = @_;
|
|
|
1620 |
|
|
|
1621 |
my $free_format = 0; # Support "free format" source code.
|
|
|
1622 |
my @save_lines = ();
|
|
|
1623 |
|
|
|
1624 |
foreach (@{$ra_lines}) {
|
|
|
1625 |
if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;}
|
|
|
1626 |
if ($free_format) {
|
|
|
1627 |
push @save_lines, $_ unless m{^\s*\*};
|
|
|
1628 |
} else {
|
|
|
1629 |
push @save_lines, $_ unless m{^......\*} or m{^\*};
|
|
|
1630 |
}
|
|
|
1631 |
}
|
|
|
1632 |
return @save_lines;
|
|
|
1633 |
} # 1}}}
|
|
|
1634 |
sub remove_jcl_comments { # {{{1
|
|
|
1635 |
my ($ra_lines, ) = @_;
|
|
|
1636 |
|
|
|
1637 |
print "-> remove_jcl_comments\n" if $opt_v > 2;
|
|
|
1638 |
|
|
|
1639 |
my @save_lines = ();
|
|
|
1640 |
my $in_comment = 0;
|
|
|
1641 |
foreach (@{$ra_lines}) {
|
|
|
1642 |
next if /^\s*$/;
|
|
|
1643 |
next if m{^\s*//\*};
|
|
|
1644 |
last if m{^\s*//\s*$};
|
|
|
1645 |
push @save_lines, $_;
|
|
|
1646 |
}
|
|
|
1647 |
|
|
|
1648 |
print "<- remove_jcl_comments\n" if $opt_v > 2;
|
|
|
1649 |
return @save_lines;
|
|
|
1650 |
} # 1}}}
|
|
|
1651 |
sub remove_jsp_comments { # {{{1
|
|
|
1652 |
# JSP comment is <%-- body of comment --%>
|
|
|
1653 |
my ($ra_lines, ) = @_;
|
|
|
1654 |
|
|
|
1655 |
print "-> remove_jsp_comments\n" if $opt_v > 2;
|
|
|
1656 |
|
|
|
1657 |
my @save_lines = ();
|
|
|
1658 |
my $in_comment = 0;
|
|
|
1659 |
foreach (@{$ra_lines}) {
|
|
|
1660 |
|
|
|
1661 |
next if /^\s*$/;
|
|
|
1662 |
s/<\%\-\-.*?\-\-\%>//g; # strip one-line comments
|
|
|
1663 |
next if /^\s*$/;
|
|
|
1664 |
if ($in_comment) {
|
|
|
1665 |
if (/\-\-\%>/) {
|
|
|
1666 |
s/^.*?\-\-\%>//;
|
|
|
1667 |
$in_comment = 0;
|
|
|
1668 |
}
|
|
|
1669 |
}
|
|
|
1670 |
next if /^\s*$/;
|
|
|
1671 |
$in_comment = 1 if /^(.*?)<\%\-\-/;
|
|
|
1672 |
next if defined $1 and $1 =~ /^\s*$/;
|
|
|
1673 |
next if ($in_comment);
|
|
|
1674 |
push @save_lines, $_;
|
|
|
1675 |
}
|
|
|
1676 |
|
|
|
1677 |
print "<- remove_jsp_comments\n" if $opt_v > 2;
|
|
|
1678 |
return @save_lines;
|
|
|
1679 |
} # 1}}}
|
|
|
1680 |
sub remove_html_comments { # {{{1
|
|
|
1681 |
# HTML comment is <!-- body of comment -->
|
|
|
1682 |
# Need to use my own routine until the HTML comment regex in
|
|
|
1683 |
# the Regexp::Common module can handle <!-- -- -->
|
|
|
1684 |
my ($ra_lines, ) = @_;
|
|
|
1685 |
|
|
|
1686 |
print "-> remove_html_comments\n" if $opt_v > 2;
|
|
|
1687 |
|
|
|
1688 |
my @save_lines = ();
|
|
|
1689 |
my $in_comment = 0;
|
|
|
1690 |
foreach (@{$ra_lines}) {
|
|
|
1691 |
|
|
|
1692 |
next if /^\s*$/;
|
|
|
1693 |
s/<!\-\-.*?\-\->//g; # strip one-line comments
|
|
|
1694 |
next if /^\s*$/;
|
|
|
1695 |
if ($in_comment) {
|
|
|
1696 |
if (/\-\->/) {
|
|
|
1697 |
s/^.*?\-\->//;
|
|
|
1698 |
$in_comment = 0;
|
|
|
1699 |
}
|
|
|
1700 |
}
|
|
|
1701 |
next if /^\s*$/;
|
|
|
1702 |
$in_comment = 1 if /^(.*?)<!\-\-/;
|
|
|
1703 |
next if defined $1 and $1 =~ /^\s*$/;
|
|
|
1704 |
next if ($in_comment);
|
|
|
1705 |
push @save_lines, $_;
|
|
|
1706 |
}
|
|
|
1707 |
|
|
|
1708 |
print "<- remove_html_comments\n" if $opt_v > 2;
|
|
|
1709 |
return @save_lines;
|
|
|
1710 |
} # 1}}}
|
|
|
1711 |
sub determine_lit_type { # {{{1
|
|
|
1712 |
my ($file) = @_;
|
|
|
1713 |
|
|
|
1714 |
open (FILE, $file);
|
|
|
1715 |
while (<FILE>) {
|
|
|
1716 |
if (m/^\\begin{code}/) { close FILE; return 2; }
|
|
|
1717 |
if (m/^>\s/) { close FILE; return 1; }
|
|
|
1718 |
}
|
|
|
1719 |
|
|
|
1720 |
return 0;
|
|
|
1721 |
} # 1}}}
|
|
|
1722 |
sub remove_haskell_comments { # {{{1
|
|
|
1723 |
# Bulk of code taken from SLOCCount's haskell_count script.
|
|
|
1724 |
# Strips out {- .. -} and -- comments and counts the rest.
|
|
|
1725 |
# Pragmas, {-#...}, are counted as SLOC.
|
|
|
1726 |
# BUG: Doesn't handle strings with embedded block comment markers gracefully.
|
|
|
1727 |
# In practice, that shouldn't be a problem.
|
|
|
1728 |
my ($ra_lines, $file, ) = @_;
|
|
|
1729 |
|
|
|
1730 |
print "-> remove_haskell_comments\n" if $opt_v > 2;
|
|
|
1731 |
|
|
|
1732 |
my @save_lines = ();
|
|
|
1733 |
my $in_comment = 0;
|
|
|
1734 |
my $incomment = 0;
|
|
|
1735 |
my ($literate, $inlitblock) = (0,0);
|
|
|
1736 |
|
|
|
1737 |
$literate = 1 if $file =~ /\.lhs$/;
|
|
|
1738 |
if($literate) { $literate = determine_lit_type($file) }
|
|
|
1739 |
|
|
|
1740 |
foreach (@{$ra_lines}) {
|
|
|
1741 |
if ($literate == 1) {
|
|
|
1742 |
if (!s/^>//) { s/.*//; }
|
|
|
1743 |
} elsif ($literate == 2) {
|
|
|
1744 |
if ($inlitblock) {
|
|
|
1745 |
if (m/^\\end{code}/) { s/.*//; $inlitblock = 0; }
|
|
|
1746 |
} elsif (!$inlitblock) {
|
|
|
1747 |
if (m/^\\begin{code}/) { s/.*//; $inlitblock = 1; }
|
|
|
1748 |
else { s/.*//; }
|
|
|
1749 |
}
|
|
|
1750 |
}
|
|
|
1751 |
|
|
|
1752 |
if ($incomment) {
|
|
|
1753 |
if (m/\-\}/) { s/^.*?\-\}//; $incomment = 0;}
|
|
|
1754 |
else { s/.*//; }
|
|
|
1755 |
}
|
|
|
1756 |
if (!$incomment) {
|
|
|
1757 |
s/--.*//;
|
|
|
1758 |
s!{-[^#].*?-}!!g;
|
|
|
1759 |
if (m/{-/ && (!m/{-#/)) {
|
|
|
1760 |
s/{-.*//;
|
|
|
1761 |
$incomment = 1;
|
|
|
1762 |
}
|
|
|
1763 |
}
|
|
|
1764 |
if (m/\S/) { push @save_lines, $_; }
|
|
|
1765 |
}
|
|
|
1766 |
# if ($incomment) {print "ERROR: ended in comment in $ARGV\n";}
|
|
|
1767 |
|
|
|
1768 |
print "<- remove_haskell_comments\n" if $opt_v > 2;
|
|
|
1769 |
return @save_lines;
|
|
|
1770 |
} # 1}}}
|
|
|
1771 |
sub print_lines { # {{{1
|
|
|
1772 |
my ($file , # in
|
|
|
1773 |
$title , # in
|
|
|
1774 |
$ra_lines , # in
|
|
|
1775 |
) = @_;
|
|
|
1776 |
printf "->%-30s %s\n", $file, $title;
|
|
|
1777 |
for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
|
|
|
1778 |
printf "%5d | %s", $i+1, $ra_lines->[$i];
|
|
|
1779 |
print "\n" unless $ra_lines->[$i] =~ m{\n$}
|
|
|
1780 |
}
|
|
|
1781 |
} # 1}}}
|
|
|
1782 |
sub set_constants { # {{{1
|
|
|
1783 |
my ($rh_Language_by_Extension , # out
|
|
|
1784 |
$rh_Language_by_Script , # out
|
|
|
1785 |
$rh_Language_by_File , # out
|
|
|
1786 |
$rhaa_Filters_by_Language , # out
|
|
|
1787 |
$rh_Not_Code_Extension , # out
|
|
|
1788 |
$rh_Not_Code_Filename , # out
|
|
|
1789 |
$rh_Scale_Factor , # out
|
|
|
1790 |
$rh_Known_Binary_Archives , # out
|
|
|
1791 |
) = @_;
|
|
|
1792 |
# 1}}}
|
|
|
1793 |
%{$rh_Language_by_Extension} = ( # {{{1
|
|
|
1794 |
'abap' => 'ABAP' ,
|
|
|
1795 |
'ac' => 'm4' ,
|
|
|
1796 |
'ada' => 'Ada' ,
|
|
|
1797 |
'adb' => 'Ada' ,
|
|
|
1798 |
'ads' => 'Ada' ,
|
|
|
1799 |
'adso' => 'ADSO/IDSM' ,
|
|
|
1800 |
'am' => 'make' ,
|
|
|
1801 |
'asa' => 'ASP' ,
|
|
|
1802 |
'asax' => 'ASP.Net' ,
|
|
|
1803 |
'ascx' => 'ASP.Net' ,
|
|
|
1804 |
'asm' => 'Assembler' ,
|
|
|
1805 |
'asmx' => 'ASP.Net' ,
|
|
|
1806 |
'asp' => 'ASP' ,
|
|
|
1807 |
'aspx' => 'ASP.Net' ,
|
|
|
1808 |
'master' => 'ASP.Net' ,
|
|
|
1809 |
'sitemap' => 'ASP.Net' ,
|
|
|
1810 |
'awk' => 'awk' ,
|
|
|
1811 |
'bash' => 'Bourne Again Shell' ,
|
|
|
1812 |
'bas' => 'Visual Basic' ,
|
|
|
1813 |
'bat' => 'DOS Batch' ,
|
|
|
1814 |
'BAT' => 'DOS Batch' ,
|
|
|
1815 |
'cbl' => 'COBOL' ,
|
|
|
1816 |
'CBL' => 'COBOL' ,
|
|
|
1817 |
'c' => 'C' ,
|
|
|
1818 |
'C' => 'C++' ,
|
|
|
1819 |
'cc' => 'C++' ,
|
|
|
1820 |
'ccs' => 'CCS' ,
|
|
|
1821 |
'cfm' => 'ColdFusion' ,
|
|
|
1822 |
'cl' => 'Lisp' ,
|
|
|
1823 |
'cls' => 'Visual Basic' ,
|
|
|
1824 |
'cob' => 'COBOL' ,
|
|
|
1825 |
'COB' => 'COBOL' ,
|
|
|
1826 |
'config' => 'ASP.Net' ,
|
|
|
1827 |
'cpp' => 'C++' ,
|
|
|
1828 |
'cs' => 'C#' ,
|
|
|
1829 |
'csh' => 'C Shell' ,
|
|
|
1830 |
'css' => "CSS" ,
|
|
|
1831 |
'cxx' => 'C++' ,
|
|
|
1832 |
'da' => 'DAL' ,
|
|
|
1833 |
'def' => 'Teamcenter def' ,
|
|
|
1834 |
'dmap' => 'NASTRAN DMAP' ,
|
|
|
1835 |
'dpr' => 'Pascal' ,
|
|
|
1836 |
'dtd' => 'DTD' ,
|
|
|
1837 |
'ec' => 'C' ,
|
|
|
1838 |
'el' => 'Lisp' ,
|
|
|
1839 |
'exp' => 'Expect' ,
|
|
|
1840 |
'f77' => 'Fortran 77' ,
|
|
|
1841 |
'F77' => 'Fortran 77' ,
|
|
|
1842 |
'f90' => 'Fortran 90' ,
|
|
|
1843 |
'F90' => 'Fortran 90' ,
|
|
|
1844 |
'f95' => 'Fortran 95' ,
|
|
|
1845 |
'F95' => 'Fortran 95' ,
|
|
|
1846 |
'f' => 'Fortran 77' ,
|
|
|
1847 |
'F' => 'Fortran 77' ,
|
|
|
1848 |
'fmt' => 'Oracle Forms' ,
|
|
|
1849 |
'focexec' => 'Focus' ,
|
|
|
1850 |
'frm' => 'Visual Basic' ,
|
|
|
1851 |
'gnumakefile' => 'make' ,
|
|
|
1852 |
'Gnumakefile' => 'make' ,
|
|
|
1853 |
'h' => 'C/C++ Header' ,
|
|
|
1854 |
'H' => 'C/C++ Header' ,
|
|
|
1855 |
'hh' => 'C/C++ Header' ,
|
|
|
1856 |
'hpp' => 'C/C++ Header' ,
|
|
|
1857 |
'hs' => 'Haskell' ,
|
|
|
1858 |
'htm' => 'HTML' ,
|
|
|
1859 |
'html' => 'HTML' ,
|
|
|
1860 |
'i3' => 'Modula3' ,
|
|
|
1861 |
'idl' => 'IDL' ,
|
|
|
1862 |
'ig' => 'Modula3' ,
|
|
|
1863 |
'inc' => 'inc' , # might be PHP
|
|
|
1864 |
'itk' => 'Tcl/Tk' ,
|
|
|
1865 |
'java' => 'Java' ,
|
|
|
1866 |
'jcl' => 'JCL' , # IBM Job Control Lang.
|
|
|
1867 |
'jl' => 'Lisp' ,
|
|
|
1868 |
'js' => 'Javascript' ,
|
|
|
1869 |
'jsp' => 'JSP' , # Java server pages
|
|
|
1870 |
'ksh' => 'Korn Shell' ,
|
|
|
1871 |
'lhs' => 'Haskell' ,
|
|
|
1872 |
'l' => 'lex' ,
|
|
|
1873 |
'lsp' => 'Lisp' ,
|
|
|
1874 |
'lua' => 'Lua' ,
|
|
|
1875 |
'm3' => 'Modula3' ,
|
|
|
1876 |
'm4' => 'm4' ,
|
|
|
1877 |
'makefile' => 'make' ,
|
|
|
1878 |
'Makefile' => 'make' ,
|
|
|
1879 |
'met' => 'Teamcenter met' ,
|
|
|
1880 |
'mg' => 'Modula3' ,
|
|
|
1881 |
'mli' => 'ML' ,
|
|
|
1882 |
'ml' => 'ML' ,
|
|
|
1883 |
'm' => 'MATLAB/Objective C/MUMPS' ,
|
|
|
1884 |
'wdproj' => 'MSBuild scripts' ,
|
|
|
1885 |
'csproj' => 'MSBuild scripts' ,
|
|
|
1886 |
'mps' => 'MUMPS' ,
|
|
|
1887 |
'mth' => 'Teamcenter mth' ,
|
|
|
1888 |
'oscript' => 'LiveLink OScript' ,
|
|
|
1889 |
'pad' => 'Ada' , # Oracle Ada preprocessor
|
|
|
1890 |
'pas' => 'Pascal' ,
|
|
|
1891 |
'pcc' => 'C++' , # Oracle C++ preprocessor
|
|
|
1892 |
'perl' => 'Perl' ,
|
|
|
1893 |
'pfo' => 'Fortran 77' ,
|
|
|
1894 |
'pgc' => 'C' , # Postgres embedded C/C++
|
|
|
1895 |
'php3' => 'PHP' ,
|
|
|
1896 |
'php4' => 'PHP' ,
|
|
|
1897 |
'php5' => 'PHP' ,
|
|
|
1898 |
'php' => 'PHP' ,
|
|
|
1899 |
'plh' => 'Perl' ,
|
|
|
1900 |
'pl' => 'Perl' ,
|
|
|
1901 |
'PL' => 'Perl' ,
|
|
|
1902 |
'plx' => 'Perl' ,
|
|
|
1903 |
'pm' => 'Perl' ,
|
|
|
1904 |
'p' => 'Pascal' ,
|
|
|
1905 |
'pp' => 'Pascal' ,
|
|
|
1906 |
'psql' => 'SQL' ,
|
|
|
1907 |
'py' => 'Python' ,
|
|
|
1908 |
'rb' => 'Ruby' ,
|
|
|
1909 |
# 'resx' => 'ASP.Net' ,
|
|
|
1910 |
'rex' => 'Oracle Reports' ,
|
|
|
1911 |
'rexx' => 'Rexx' ,
|
|
|
1912 |
's' => 'Assembler' ,
|
|
|
1913 |
'S' => 'Assembler' ,
|
|
|
1914 |
'sbl' => 'Softbridge Basic' ,
|
|
|
1915 |
'SBL' => 'Softbridge Basic' ,
|
|
|
1916 |
'sc' => 'Lisp' ,
|
|
|
1917 |
'scm' => 'Lisp' ,
|
|
|
1918 |
'sed' => 'sed' ,
|
|
|
1919 |
'ses' => 'Patran Command Language' ,
|
|
|
1920 |
'sh' => 'Bourne Shell' ,
|
|
|
1921 |
'sql' => 'SQL' ,
|
|
|
1922 |
'tcl' => 'Tcl/Tk' ,
|
|
|
1923 |
'tcsh' => 'C Shell' ,
|
|
|
1924 |
'tk' => 'Tcl/Tk' ,
|
|
|
1925 |
'vba' => 'Visual Basic' ,
|
|
|
1926 |
# 'vbp' => 'Visual Basic' , # .vbp - autogenerated
|
|
|
1927 |
'vb' => 'Visual Basic' ,
|
|
|
1928 |
# 'vbw' => 'Visual Basic' , # .vbw - autogenerated
|
|
|
1929 |
'vbs' => 'Visual Basic' ,
|
|
|
1930 |
'webinfo' => 'ASP.Net' ,
|
|
|
1931 |
'xml' => 'XML' ,
|
|
|
1932 |
'build' => 'NAnt scripts' ,
|
|
|
1933 |
'vim' => 'vim script' ,
|
|
|
1934 |
'xsd' => 'XSD' ,
|
|
|
1935 |
'xslt' => 'XSLT' ,
|
|
|
1936 |
'xsl' => 'XSLT' ,
|
|
|
1937 |
'y' => 'yacc' ,
|
|
|
1938 |
'yaml' => 'YAML' ,
|
|
|
1939 |
);
|
|
|
1940 |
# 1}}}
|
|
|
1941 |
%{$rh_Language_by_Script} = ( # {{{1
|
|
|
1942 |
'awk' => 'awk' ,
|
|
|
1943 |
'bash' => 'Bourne Again Shell' ,
|
|
|
1944 |
'bc' => 'bc' ,# calculator
|
|
|
1945 |
'csh' => 'C Shell' ,
|
|
|
1946 |
'idl' => 'IDL' ,
|
|
|
1947 |
'ksh' => 'Korn Shell' ,
|
|
|
1948 |
'make' => 'make' ,
|
|
|
1949 |
'octave' => 'Octave' ,
|
|
|
1950 |
'perl5' => 'Perl' ,
|
|
|
1951 |
'perl' => 'Perl' ,
|
|
|
1952 |
'ruby' => 'Ruby' ,
|
|
|
1953 |
'sed' => 'sed' ,
|
|
|
1954 |
'sh' => 'Bourne Shell' ,
|
|
|
1955 |
'tcl' => 'Tcl/Tk' ,
|
|
|
1956 |
'tcsh' => 'C Shell' ,
|
|
|
1957 |
'wish' => 'Tcl/Tk' ,
|
|
|
1958 |
);
|
|
|
1959 |
# 1}}}
|
|
|
1960 |
%{$rh_Language_by_File} = ( # {{{1
|
|
|
1961 |
'Makefile' => 'make' ,
|
|
|
1962 |
'makefile' => 'make' ,
|
|
|
1963 |
'gnumakefile' => 'make' ,
|
|
|
1964 |
'Gnumakefile' => 'make' ,
|
|
|
1965 |
);
|
|
|
1966 |
# 1}}}
|
|
|
1967 |
%{$rhaa_Filters_by_Language} = ( # {{{1
|
|
|
1968 |
'ABAP' => [ [ 'remove_matches' , '^\*' ], ],
|
|
|
1969 |
'ASP' => [ [ 'remove_matches' , '^\s*\47'], ], # \47 = '
|
|
|
1970 |
'ASP.Net' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
1971 |
'Ada' => [ [ 'remove_matches' , '^\s*--' ], ],
|
|
|
1972 |
'ADSO/IDSM' => [ [ 'remove_matches' , '^\s*\*[\+\!]' ], ],
|
|
|
1973 |
'Assembler' => [
|
|
|
1974 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
1975 |
[ 'remove_matches' , '^\s*;' ],
|
|
|
1976 |
[ 'call_regexp_common' , 'C' ],
|
|
|
1977 |
],
|
|
|
1978 |
'awk' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
1979 |
'bc' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
1980 |
'C' => [
|
|
|
1981 |
[ 'remove_matches' , '^\s*//' ], # C99
|
|
|
1982 |
[ 'call_regexp_common' , 'C' ], ],
|
|
|
1983 |
'C++' => [
|
|
|
1984 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
1985 |
[ 'call_regexp_common' , 'C' ],
|
|
|
1986 |
],
|
|
|
1987 |
'C/C++ Header' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
1988 |
'C#' => [
|
|
|
1989 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
1990 |
[ 'call_regexp_common' , 'C' ],
|
|
|
1991 |
],
|
|
|
1992 |
'CCS' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
1993 |
'CSS' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
1994 |
'COBOL' => [ [ 'remove_cobol_comments', ], ],
|
|
|
1995 |
'ColdFusion' => [ [ 'remove_html_comments', ],
|
|
|
1996 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
1997 |
'Crystal Reports' => [ [ 'remove_matches' , '^\s*//' ], ],
|
|
|
1998 |
'DAL' => [ [ 'remove_between' , '[]', ], ],
|
|
|
1999 |
'NASTRAN DMAP' => [ [ 'remove_matches' , '^\s*\$' ], ],
|
|
|
2000 |
'DOS Batch' => [ [ 'remove_matches' , '^\s*rem', ], ],
|
|
|
2001 |
'DTD' => [ [ 'remove_html_comments', ],
|
|
|
2002 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
2003 |
'Expect' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2004 |
'Focus' => [ [ 'remove_matches' , '^\s*\-\*' ], ],
|
|
|
2005 |
'Fortran 77' => [ [ 'remove_f77_comments' , ], ],
|
|
|
2006 |
'Fortran 90' => [ [ 'remove_f77_comments' , ],
|
|
|
2007 |
[ 'remove_f90_comments' , ], ],
|
|
|
2008 |
'Fortran 95' => [ [ 'remove_f77_comments' , ],
|
|
|
2009 |
[ 'remove_f90_comments' , ], ],
|
|
|
2010 |
'HTML' => [ [ 'remove_html_comments', ],
|
|
|
2011 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
2012 |
'Haskell' => [ [ 'remove_haskell_comments', '>filename<' ], ],
|
|
|
2013 |
'IDL' => [ [ 'remove_matches' , '^\s*;' ], ],
|
|
|
2014 |
'JSP' => [ [ 'remove_html_comments', ],
|
|
|
2015 |
[ 'call_regexp_common' , 'HTML' ],
|
|
|
2016 |
[ 'remove_jsp_comments', ], ],
|
|
|
2017 |
'Java' => [
|
|
|
2018 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
2019 |
[ 'call_regexp_common' , 'C' ],
|
|
|
2020 |
],
|
|
|
2021 |
'Javascript' => [
|
|
|
2022 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
2023 |
[ 'call_regexp_common' , 'C' ],
|
|
|
2024 |
],
|
|
|
2025 |
'JCL' => [ [ 'remove_jcl_comments' , ], ],
|
|
|
2026 |
'Lisp' => [ [ 'remove_matches' , '^\s*;' ], ],
|
|
|
2027 |
'LiveLink OScript' => [ [ 'remove_matches' , '^\s*//' ], ],
|
|
|
2028 |
'Lua' => [ [ 'call_regexp_common' , 'lua' ], ],
|
|
|
2029 |
'make' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2030 |
'MATLAB' => [ [ 'remove_matches' , '^\s*%' ], ],
|
|
|
2031 |
'Modula3' => [ [ 'call_regexp_common' , 'Pascal' ], ],
|
|
|
2032 |
# Modula 3 comments are (* ... *) so applying the Pascal filter
|
|
|
2033 |
# which also treats { ... } as a comment is not really correct.
|
|
|
2034 |
'Objective C' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
2035 |
'MATLAB/Objective C/MUMPS' => [ [ 'die' , ], ], # never called
|
|
|
2036 |
'MUMPS' => [ [ 'remove_matches' , '^\s*;' ], ],
|
|
|
2037 |
'Octave' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2038 |
'Oracle Forms' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
2039 |
'Oracle Reports' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
2040 |
'Pascal' => [ [ 'call_regexp_common' , 'Pascal' ], ],
|
|
|
2041 |
'Patran Command Language'=> [
|
|
|
2042 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
2043 |
[ 'remove_matches' , '^\s*\$#' ],
|
|
|
2044 |
[ 'call_regexp_common' , 'C' ],
|
|
|
2045 |
],
|
|
|
2046 |
'Perl' => [ [ 'remove_below' , '^__(END|DATA)__'],
|
|
|
2047 |
[ 'remove_matches' , '^\s*#' ], ],
|
|
|
2048 |
'Python' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2049 |
'PHP' => [
|
|
|
2050 |
[ 'remove_matches' , '^\s*#' ],
|
|
|
2051 |
[ 'remove_matches' , '^\s*//' ],
|
|
|
2052 |
[ 'call_regexp_common' , 'C' ],
|
|
|
2053 |
],
|
|
|
2054 |
'Rexx' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
2055 |
'Ruby' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2056 |
'SQL' => [
|
|
|
2057 |
[ 'remove_matches' , '^\s*--' ],
|
|
|
2058 |
[ 'call_regexp_common' , 'C' ],
|
|
|
2059 |
],
|
|
|
2060 |
'sed' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2061 |
'Bourne Again Shell' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2062 |
'Bourne Shell' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2063 |
'm4' => [ [ 'remove_matches' , '^dnl ' ], ],
|
|
|
2064 |
'C Shell' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2065 |
'Korn Shell' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2066 |
'Tcl/Tk' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2067 |
'Teamcenter def' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2068 |
'Teamcenter met' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
2069 |
'Teamcenter mth' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2070 |
'Softbridge Basic' => [ [ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ],
|
|
|
2071 |
[ 'remove_matches' , '^\s*Attribute\s+'],
|
|
|
2072 |
[ 'remove_matches' , '^\s*\47'], ], # \47 = '
|
|
|
2073 |
'vim script' => [ [ 'remove_matches' , '^\s*"' ], ],
|
|
|
2074 |
'Visual Basic' => [ [ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ],
|
|
|
2075 |
[ 'remove_matches' , '^\s*Attribute\s+'],
|
|
|
2076 |
[ 'remove_matches' , '^\s*\47'], ], # \47 = '
|
|
|
2077 |
'yacc' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
2078 |
'YAML' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
|
2079 |
'lex' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
|
2080 |
'XML' => [ [ 'remove_html_comments', ],
|
|
|
2081 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
2082 |
'XSD' => [ [ 'remove_html_comments', ],
|
|
|
2083 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
2084 |
'XSLT' => [ [ 'remove_html_comments', ],
|
|
|
2085 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
2086 |
'NAnt scripts' => [ [ 'remove_html_comments', ],
|
|
|
2087 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
2088 |
'MSBuild scripts' => [ [ 'remove_html_comments', ],
|
|
|
2089 |
[ 'call_regexp_common' , 'HTML' ], ],
|
|
|
2090 |
);
|
|
|
2091 |
# 1}}}
|
|
|
2092 |
%{$rh_Not_Code_Extension} = ( # {{{1
|
|
|
2093 |
'1' => 1, # Man pages (documentation):
|
|
|
2094 |
'2' => 1,
|
|
|
2095 |
'3' => 1,
|
|
|
2096 |
'4' => 1,
|
|
|
2097 |
'5' => 1,
|
|
|
2098 |
'6' => 1,
|
|
|
2099 |
'7' => 1,
|
|
|
2100 |
'8' => 1,
|
|
|
2101 |
'9' => 1,
|
|
|
2102 |
'a' => 1, # Static object code.
|
|
|
2103 |
'ad' => 1, # X application default resource file.
|
|
|
2104 |
'afm' => 1, # font metrics
|
|
|
2105 |
'arc' => 1, # arc(1) archive
|
|
|
2106 |
'arj' => 1, # arj(1) archive
|
|
|
2107 |
'au' => 1, # Audio sound filearj(1) archive
|
|
|
2108 |
'bak' => 1, # Backup files - we only want to count the "real" files.
|
|
|
2109 |
'bdf' => 1,
|
|
|
2110 |
'bmp' => 1,
|
|
|
2111 |
'bz2' => 1, # bzip2(1) compressed file
|
|
|
2112 |
'csv' => 1, # comma separated values
|
|
|
2113 |
'desktop' => 1,
|
|
|
2114 |
'dic' => 1,
|
|
|
2115 |
'doc' => 1,
|
|
|
2116 |
'elc' => 1,
|
|
|
2117 |
'eps' => 1,
|
|
|
2118 |
'fig' => 1,
|
|
|
2119 |
'gif' => 1,
|
|
|
2120 |
'gz' => 1,
|
|
|
2121 |
'hdf' => 1, # hierarchical data format
|
|
|
2122 |
'in' => 1, # Debatable.
|
|
|
2123 |
'jpg' => 1,
|
|
|
2124 |
'kdelnk' => 1,
|
|
|
2125 |
'man' => 1,
|
|
|
2126 |
'mf' => 1,
|
|
|
2127 |
'mp3' => 1,
|
|
|
2128 |
'n' => 1,
|
|
|
2129 |
'o' => 1, # Object code is generated from source code.
|
|
|
2130 |
'pbm' => 1,
|
|
|
2131 |
'pdf' => 1,
|
|
|
2132 |
'pfb' => 1,
|
|
|
2133 |
'png' => 1,
|
|
|
2134 |
'po' => 1,
|
|
|
2135 |
'ps' => 1, # Postscript is _USUALLY_ generated automatically.
|
|
|
2136 |
'sgm' => 1,
|
|
|
2137 |
'sgml' => 1,
|
|
|
2138 |
'so' => 1, # Dynamically-loaded object code.
|
|
|
2139 |
'Tag' => 1,
|
|
|
2140 |
'tex' => 1,
|
|
|
2141 |
'text' => 1,
|
|
|
2142 |
'tfm' => 1,
|
|
|
2143 |
'tgz' => 1, # gzipped tarball
|
|
|
2144 |
'tiff' => 1,
|
|
|
2145 |
'txt' => 1,
|
|
|
2146 |
'vf' => 1,
|
|
|
2147 |
'wav' => 1,
|
|
|
2148 |
'xbm' => 1,
|
|
|
2149 |
'xpm' => 1,
|
|
|
2150 |
'Y' => 1, # file compressed with "Yabba"
|
|
|
2151 |
'Z' => 1, # file compressed with "compress"
|
|
|
2152 |
'zip' => 1, # zip archive
|
|
|
2153 |
); # 1}}}
|
|
|
2154 |
%{$rh_Not_Code_Filename} = ( # {{{1
|
|
|
2155 |
'AUTHORS' => 1,
|
|
|
2156 |
'README' => 1,
|
|
|
2157 |
'Readme' => 1,
|
|
|
2158 |
'readme' => 1,
|
|
|
2159 |
'README.tk' => 1, # used in kdemultimedia, it's confusing.
|
|
|
2160 |
'Changelog' => 1,
|
|
|
2161 |
'ChangeLog' => 1,
|
|
|
2162 |
'Repository' => 1,
|
|
|
2163 |
'CHANGES' => 1,
|
|
|
2164 |
'Changes' => 1,
|
|
|
2165 |
'.cvsignore' => 1,
|
|
|
2166 |
'Root' => 1, # CVS
|
|
|
2167 |
'BUGS' => 1,
|
|
|
2168 |
'TODO' => 1,
|
|
|
2169 |
'COPYING' => 1,
|
|
|
2170 |
'MAINTAINERS' => 1,
|
|
|
2171 |
'Entries' => 1,
|
|
|
2172 |
'iconfig.h' => 1, # Skip "iconfig.h" files; they're used in Imakefiles
|
|
|
2173 |
# (used in xlockmore):
|
|
|
2174 |
);
|
|
|
2175 |
# 1}}}
|
|
|
2176 |
%{$rh_Scale_Factor} = ( # {{{1
|
|
|
2177 |
'1032/af' => 5.00,
|
|
|
2178 |
'1st generation default' => 0.25,
|
|
|
2179 |
'2nd generation default' => 0.75,
|
|
|
2180 |
'3rd generation default' => 1.00,
|
|
|
2181 |
'4th generation default' => 4.00,
|
|
|
2182 |
'5th generation default' => 16.00,
|
|
|
2183 |
'aas macro' => 0.88,
|
|
|
2184 |
'abap/4' => 5.00,
|
|
|
2185 |
'ABAP' => 5.00,
|
|
|
2186 |
'accel' => 4.21,
|
|
|
2187 |
'access' => 2.11,
|
|
|
2188 |
'actor' => 3.81,
|
|
|
2189 |
'acumen' => 2.86,
|
|
|
2190 |
'Ada' => 0.52,
|
|
|
2191 |
'Ada 83' => 1.13,
|
|
|
2192 |
'Ada 95' => 1.63,
|
|
|
2193 |
'adr/dl' => 2.00,
|
|
|
2194 |
'adr/ideal/pdl' => 4.00,
|
|
|
2195 |
'ads/batch' => 4.00,
|
|
|
2196 |
'ads/online' => 4.00,
|
|
|
2197 |
'ADSO/IDSM' => 3.00,
|
|
|
2198 |
'advantage' => 2.11,
|
|
|
2199 |
'ai shell default' => 1.63,
|
|
|
2200 |
'ai shells' => 1.63,
|
|
|
2201 |
'algol 68' => 0.75,
|
|
|
2202 |
'algol w' => 0.75,
|
|
|
2203 |
'ambush' => 2.50,
|
|
|
2204 |
'aml' => 1.63,
|
|
|
2205 |
'amppl ii' => 1.25,
|
|
|
2206 |
'ansi basic' => 1.25,
|
|
|
2207 |
'ansi cobol 74' => 0.75,
|
|
|
2208 |
'ansi cobol 85' => 0.88,
|
|
|
2209 |
'SQL' => 6.15,
|
|
|
2210 |
'answer/db' => 6.15,
|
|
|
2211 |
'apl 360/370' => 2.50,
|
|
|
2212 |
'apl default' => 2.50,
|
|
|
2213 |
'apl*plus' => 2.50,
|
|
|
2214 |
'applesoft basic' => 0.63,
|
|
|
2215 |
'application builder' => 4.00,
|
|
|
2216 |
'application manager' => 2.22,
|
|
|
2217 |
'aps' => 0.96,
|
|
|
2218 |
'aps' => 4.71,
|
|
|
2219 |
'apt' => 1.13,
|
|
|
2220 |
'aptools' => 4.00,
|
|
|
2221 |
'arc' => 1.63,
|
|
|
2222 |
'ariel' => 0.75,
|
|
|
2223 |
'arity' => 1.63,
|
|
|
2224 |
'arity prolog' => 1.25,
|
|
|
2225 |
'art' => 1.63,
|
|
|
2226 |
'art enterprise' => 1.74,
|
|
|
2227 |
'artemis' => 2.00,
|
|
|
2228 |
'artim' => 1.74,
|
|
|
2229 |
'as/set' => 4.21,
|
|
|
2230 |
'asi/inquiry' => 6.15,
|
|
|
2231 |
'ask windows' => 1.74,
|
|
|
2232 |
'asa' => 1.29,
|
|
|
2233 |
'ASP' => 1.29,
|
|
|
2234 |
'ASP.Net' => 1.29,
|
|
|
2235 |
'aspx' => 1.29,
|
|
|
2236 |
#'resx' => 1.29,
|
|
|
2237 |
'asax' => 1.29,
|
|
|
2238 |
'ascx' => 1.29,
|
|
|
2239 |
'asmx' => 1.29,
|
|
|
2240 |
'config' => 1.29,
|
|
|
2241 |
'webinfo' => 1.29,
|
|
|
2242 |
'CCS' => 5.33,
|
|
|
2243 |
|
|
|
2244 |
# 'assembler (basic)' => 0.25,
|
|
|
2245 |
'Assembler' => 0.25,
|
|
|
2246 |
|
|
|
2247 |
'assembler (macro)' => 0.51,
|
|
|
2248 |
'associative default' => 1.25,
|
|
|
2249 |
'autocoder' => 0.25,
|
|
|
2250 |
'awk' => 3.81,
|
|
|
2251 |
'aztec c' => 0.63,
|
|
|
2252 |
'balm' => 0.75,
|
|
|
2253 |
'base sas' => 1.51,
|
|
|
2254 |
'basic' => 0.75,
|
|
|
2255 |
'basic a' => 0.63,
|
|
|
2256 |
# 'basic assembly' => 0.25,
|
|
|
2257 |
'bc' => 1.50,
|
|
|
2258 |
'berkeley pascal' => 0.88,
|
|
|
2259 |
'better basic' => 0.88,
|
|
|
2260 |
'bliss' => 0.75,
|
|
|
2261 |
'bmsgen' => 2.22,
|
|
|
2262 |
'boeingcalc' => 13.33,
|
|
|
2263 |
'bteq' => 6.15,
|
|
|
2264 |
|
|
|
2265 |
'C' => 0.77,
|
|
|
2266 |
|
|
|
2267 |
'c set 2' => 0.88,
|
|
|
2268 |
|
|
|
2269 |
'C#' => 1.36,
|
|
|
2270 |
|
|
|
2271 |
'C++' => 1.51,
|
|
|
2272 |
|
|
|
2273 |
'c86plus' => 0.63,
|
|
|
2274 |
'cadbfast' => 2.00,
|
|
|
2275 |
'caearl' => 2.86,
|
|
|
2276 |
'cast' => 1.63,
|
|
|
2277 |
'cbasic' => 0.88,
|
|
|
2278 |
'cdadl' => 4.00,
|
|
|
2279 |
'cellsim' => 1.74,
|
|
|
2280 |
'ColdFusion' => 4.00,
|
|
|
2281 |
'chili' => 0.75,
|
|
|
2282 |
'chill' => 0.75,
|
|
|
2283 |
'cics' => 1.74,
|
|
|
2284 |
'clarion' => 1.38,
|
|
|
2285 |
'clascal' => 1.00,
|
|
|
2286 |
'cli' => 2.50,
|
|
|
2287 |
'clipper' => 2.05,
|
|
|
2288 |
'clipper db' => 2.00,
|
|
|
2289 |
'clos' => 3.81,
|
|
|
2290 |
'clout' => 2.00,
|
|
|
2291 |
'cms2' => 0.75,
|
|
|
2292 |
'cmsgen' => 4.21,
|
|
|
2293 |
'COBOL' => 1.04,
|
|
|
2294 |
'COBOL ii' => 0.75,
|
|
|
2295 |
'COBOL/400' => 0.88,
|
|
|
2296 |
'cobra' => 4.00,
|
|
|
2297 |
'codecenter' => 2.22,
|
|
|
2298 |
'cofac' => 2.22,
|
|
|
2299 |
'cogen' => 2.22,
|
|
|
2300 |
'cognos' => 2.22,
|
|
|
2301 |
'cogo' => 1.13,
|
|
|
2302 |
'comal' => 1.00,
|
|
|
2303 |
'comit ii' => 1.25,
|
|
|
2304 |
'common lisp' => 1.25,
|
|
|
2305 |
'concurrent pascal' => 1.00,
|
|
|
2306 |
'conniver' => 1.25,
|
|
|
2307 |
'cool:gen/ief' => 2.58,
|
|
|
2308 |
'coral 66' => 0.75,
|
|
|
2309 |
'corvet' => 4.21,
|
|
|
2310 |
'corvision' => 5.33,
|
|
|
2311 |
'cpl' => 0.50,
|
|
|
2312 |
'Crystal Reports' => 4.00,
|
|
|
2313 |
'csl' => 1.63,
|
|
|
2314 |
'csp' => 1.51,
|
|
|
2315 |
'cssl' => 1.74,
|
|
|
2316 |
|
|
|
2317 |
'CSS' => 1.0,
|
|
|
2318 |
|
|
|
2319 |
'culprit' => 1.57,
|
|
|
2320 |
'cxpert' => 1.63,
|
|
|
2321 |
'cygnet' => 4.21,
|
|
|
2322 |
'DAL' => 1.50,
|
|
|
2323 |
'data base default' => 2.00,
|
|
|
2324 |
'dataflex' => 2.00,
|
|
|
2325 |
'datatrieve' => 4.00,
|
|
|
2326 |
'dbase iii' => 2.00,
|
|
|
2327 |
'dbase iv' => 1.54,
|
|
|
2328 |
'dcl' => 0.38,
|
|
|
2329 |
'decision support default' => 2.22,
|
|
|
2330 |
'decrally' => 2.00,
|
|
|
2331 |
'delphi' => 2.76,
|
|
|
2332 |
'dl/1' => 2.00,
|
|
|
2333 |
'NASTRAN DMAP' => 2.35,
|
|
|
2334 |
'dna4' => 4.21,
|
|
|
2335 |
'DOS Batch' => 0.63,
|
|
|
2336 |
'dsp assembly' => 0.50,
|
|
|
2337 |
'dtabl' => 1.74,
|
|
|
2338 |
'dtipt' => 1.74,
|
|
|
2339 |
'dyana' => 1.13,
|
|
|
2340 |
'dynamoiii' => 1.74,
|
|
|
2341 |
'easel' => 2.76,
|
|
|
2342 |
'easy' => 1.63,
|
|
|
2343 |
'easytrieve+' => 2.35,
|
|
|
2344 |
'eclipse' => 1.63,
|
|
|
2345 |
'eda/sql' => 6.67,
|
|
|
2346 |
'edscheme 3.4' => 1.51,
|
|
|
2347 |
'eiffel' => 3.81,
|
|
|
2348 |
'enform' => 1.74,
|
|
|
2349 |
'englishbased default' => 1.51,
|
|
|
2350 |
'ensemble' => 2.76,
|
|
|
2351 |
'epos' => 4.00,
|
|
|
2352 |
'erlang' => 2.00,
|
|
|
2353 |
'esf' => 2.00,
|
|
|
2354 |
'espadvisor' => 1.63,
|
|
|
2355 |
'espl/i' => 1.13,
|
|
|
2356 |
'euclid' => 0.75,
|
|
|
2357 |
'excel' => 1.74,
|
|
|
2358 |
'excel 12' => 13.33,
|
|
|
2359 |
'excel 34' => 13.33,
|
|
|
2360 |
'excel 5' => 13.33,
|
|
|
2361 |
'express' => 2.22,
|
|
|
2362 |
'exsys' => 1.63,
|
|
|
2363 |
'extended common lisp' => 1.43,
|
|
|
2364 |
'eznomad' => 2.22,
|
|
|
2365 |
'facets' => 4.00,
|
|
|
2366 |
'factorylink iv' => 2.76,
|
|
|
2367 |
'fame' => 2.22,
|
|
|
2368 |
'filemaker pro' => 2.22,
|
|
|
2369 |
'flavors' => 2.76,
|
|
|
2370 |
'flex' => 1.74,
|
|
|
2371 |
'flexgen' => 2.76,
|
|
|
2372 |
'Focus' => 1.90,
|
|
|
2373 |
'foil' => 1.51,
|
|
|
2374 |
'forte' => 4.44,
|
|
|
2375 |
'forth' => 1.25,
|
|
|
2376 |
'Fortran 66' => 0.63,
|
|
|
2377 |
'Fortran 77' => 0.75,
|
|
|
2378 |
'Fortran 90' => 1.00,
|
|
|
2379 |
'Fortran 95' => 1.13,
|
|
|
2380 |
'Fortran II' => 0.63,
|
|
|
2381 |
'foundation' => 2.76,
|
|
|
2382 |
'foxpro' => 2.29,
|
|
|
2383 |
'foxpro 1' => 2.00,
|
|
|
2384 |
'foxpro 2.5' => 2.35,
|
|
|
2385 |
'framework' => 13.33,
|
|
|
2386 |
'g2' => 1.63,
|
|
|
2387 |
'gamma' => 5.00,
|
|
|
2388 |
'genascript' => 2.96,
|
|
|
2389 |
'gener/ol' => 6.15,
|
|
|
2390 |
'genexus' => 5.33,
|
|
|
2391 |
'genifer' => 4.21,
|
|
|
2392 |
'geode 2.0' => 5.00,
|
|
|
2393 |
'gfa basic' => 2.35,
|
|
|
2394 |
'gml' => 1.74,
|
|
|
2395 |
'golden common lisp' => 1.25,
|
|
|
2396 |
'gpss' => 1.74,
|
|
|
2397 |
'guest' => 2.86,
|
|
|
2398 |
'guru' => 1.63,
|
|
|
2399 |
'gw basic' => 0.82,
|
|
|
2400 |
'Haskell' => 2.11,
|
|
|
2401 |
'high c' => 0.63,
|
|
|
2402 |
'hlevel' => 1.38,
|
|
|
2403 |
'hp basic' => 0.63,
|
|
|
2404 |
|
|
|
2405 |
'HTML' => 1.90 ,
|
|
|
2406 |
'XML' => 1.90 ,
|
|
|
2407 |
'XSLT' => 1.90 ,
|
|
|
2408 |
'DTD' => 1.90 ,
|
|
|
2409 |
'XSD' => 1.90 ,
|
|
|
2410 |
'NAnt scripts' => 1.90 ,
|
|
|
2411 |
'MSBuild scripts' => 1.90 ,
|
|
|
2412 |
|
|
|
2413 |
'HTML 2' => 5.00,
|
|
|
2414 |
'HTML 3' => 5.33,
|
|
|
2415 |
'huron' => 5.00,
|
|
|
2416 |
'ibm adf i' => 4.00,
|
|
|
2417 |
'ibm adf ii' => 4.44,
|
|
|
2418 |
'ibm advanced basic' => 0.82,
|
|
|
2419 |
'ibm cics/vs' => 2.00,
|
|
|
2420 |
'ibm compiled basic' => 0.88,
|
|
|
2421 |
'ibm vs cobol' => 0.75,
|
|
|
2422 |
'ibm vs cobol ii' => 0.88,
|
|
|
2423 |
'ices' => 1.13,
|
|
|
2424 |
'icon' => 1.00,
|
|
|
2425 |
'ideal' => 1.54,
|
|
|
2426 |
'idms' => 2.00,
|
|
|
2427 |
'ief' => 5.71,
|
|
|
2428 |
'ief/cool:gen' => 2.58,
|
|
|
2429 |
'iew' => 5.71,
|
|
|
2430 |
'ifps/plus' => 2.50,
|
|
|
2431 |
'imprs' => 2.00,
|
|
|
2432 |
'informix' => 2.58,
|
|
|
2433 |
'ingres' => 2.00,
|
|
|
2434 |
'inquire' => 6.15,
|
|
|
2435 |
'insight2' => 1.63,
|
|
|
2436 |
'install/1' => 5.00,
|
|
|
2437 |
'intellect' => 1.51,
|
|
|
2438 |
'interlisp' => 1.38,
|
|
|
2439 |
'interpreted basic' => 0.75,
|
|
|
2440 |
'interpreted c' => 0.63,
|
|
|
2441 |
'iqlisp' => 1.38,
|
|
|
2442 |
'iqrp' => 6.15,
|
|
|
2443 |
'j2ee' => 1.60,
|
|
|
2444 |
'janus' => 1.13,
|
|
|
2445 |
'Java' => 1.36,
|
|
|
2446 |
'Javascript' => 1.48,
|
|
|
2447 |
'JSP' => 1.48,
|
|
|
2448 |
'JCL' => 1.67,
|
|
|
2449 |
'joss' => 0.75,
|
|
|
2450 |
'jovial' => 0.75,
|
|
|
2451 |
'jsp' => 1.36,
|
|
|
2452 |
'kappa' => 2.00,
|
|
|
2453 |
'kbms' => 1.63,
|
|
|
2454 |
'kcl' => 1.25,
|
|
|
2455 |
'kee' => 1.63,
|
|
|
2456 |
'keyplus' => 2.00,
|
|
|
2457 |
'kl' => 1.25,
|
|
|
2458 |
'klo' => 1.25,
|
|
|
2459 |
'knowol' => 1.63,
|
|
|
2460 |
'krl' => 1.38,
|
|
|
2461 |
'Korn Shell' => 3.81,
|
|
|
2462 |
'ladder logic' => 2.22,
|
|
|
2463 |
'lambit/l' => 1.25,
|
|
|
2464 |
'lattice c' => 0.63,
|
|
|
2465 |
'liana' => 0.63,
|
|
|
2466 |
'lilith' => 1.13,
|
|
|
2467 |
'linc ii' => 5.71,
|
|
|
2468 |
'Lisp' => 1.25,
|
|
|
2469 |
'LiveLink OScript' => 3.5 ,
|
|
|
2470 |
'loglisp' => 1.38,
|
|
|
2471 |
'loops' => 3.81,
|
|
|
2472 |
'lotus 123 dos' => 13.33,
|
|
|
2473 |
'lotus macros' => 0.75,
|
|
|
2474 |
'lotus notes' => 3.64,
|
|
|
2475 |
'lucid 3d' => 13.33,
|
|
|
2476 |
'lyric' => 1.51,
|
|
|
2477 |
'm' => 5.00,
|
|
|
2478 |
'macforth' => 1.25,
|
|
|
2479 |
'mach1' => 2.00,
|
|
|
2480 |
'machine language' => 0.13,
|
|
|
2481 |
'maestro' => 5.00,
|
|
|
2482 |
'magec' => 5.00,
|
|
|
2483 |
'magik' => 3.81,
|
|
|
2484 |
'Lake' => 3.81,
|
|
|
2485 |
'make' => 2.50,
|
|
|
2486 |
'mantis' => 2.96,
|
|
|
2487 |
'mapper' => 0.99,
|
|
|
2488 |
'mark iv' => 2.00,
|
|
|
2489 |
'mark v' => 2.22,
|
|
|
2490 |
'mathcad' => 16.00,
|
|
|
2491 |
'mdl' => 2.22,
|
|
|
2492 |
'mentor' => 1.51,
|
|
|
2493 |
'mesa' => 0.75,
|
|
|
2494 |
'microfocus cobol' => 1.00,
|
|
|
2495 |
'microforth' => 1.25,
|
|
|
2496 |
'microsoft c' => 0.63,
|
|
|
2497 |
'microstep' => 4.00,
|
|
|
2498 |
'miranda' => 2.00,
|
|
|
2499 |
'model 204' => 2.11,
|
|
|
2500 |
'modula 2' => 1.00,
|
|
|
2501 |
'mosaic' => 13.33,
|
|
|
2502 |
# 'ms c ++ v. 7' => 1.51,
|
|
|
2503 |
'ms compiled basic' => 0.88,
|
|
|
2504 |
'msl' => 1.25,
|
|
|
2505 |
'mulisp' => 1.25,
|
|
|
2506 |
'MUMPS' => 4.21,
|
|
|
2507 |
'Nastran' => 1.13,
|
|
|
2508 |
'natural' => 1.54,
|
|
|
2509 |
'natural 1' => 1.51,
|
|
|
2510 |
'natural 2' => 1.74,
|
|
|
2511 |
'natural construct' => 3.20,
|
|
|
2512 |
'natural language' => 0.03,
|
|
|
2513 |
'netron/cap' => 4.21,
|
|
|
2514 |
'nexpert' => 1.63,
|
|
|
2515 |
'nial' => 1.63,
|
|
|
2516 |
'nomad2' => 2.00,
|
|
|
2517 |
'nonprocedural default' => 2.22,
|
|
|
2518 |
'notes vip' => 2.22,
|
|
|
2519 |
'nroff' => 1.51,
|
|
|
2520 |
'object assembler' => 1.25,
|
|
|
2521 |
'object lisp' => 2.76,
|
|
|
2522 |
'object logo' => 2.76,
|
|
|
2523 |
'object pascal' => 2.76,
|
|
|
2524 |
'object star' => 5.00,
|
|
|
2525 |
'Objective C' => 2.96,
|
|
|
2526 |
'objectoriented default' => 2.76,
|
|
|
2527 |
'objectview' => 3.20,
|
|
|
2528 |
'ogl' => 1.00,
|
|
|
2529 |
'omnis 7' => 2.00,
|
|
|
2530 |
'oodl' => 2.76,
|
|
|
2531 |
'ops' => 1.74,
|
|
|
2532 |
'ops5' => 1.38,
|
|
|
2533 |
'oracle' => 2.76,
|
|
|
2534 |
'Oracle Reports' => 2.76,
|
|
|
2535 |
'Oracle Forms' => 2.67,
|
|
|
2536 |
'Oracle Developer/2000' => 3.48,
|
|
|
2537 |
'oscar' => 0.75,
|
|
|
2538 |
'pacbase' => 1.67,
|
|
|
2539 |
'pace' => 2.00,
|
|
|
2540 |
'paradox/pal' => 2.22,
|
|
|
2541 |
'Pascal' => 0.88,
|
|
|
2542 |
'Patran Command Language' => 2.50,
|
|
|
2543 |
'pc focus' => 2.22,
|
|
|
2544 |
'pdl millenium' => 3.81,
|
|
|
2545 |
'pdp11 ade' => 1.51,
|
|
|
2546 |
'peoplesoft' => 2.50,
|
|
|
2547 |
'Perl' => 4.00,
|
|
|
2548 |
'persistance object builder' => 3.81,
|
|
|
2549 |
'pilot' => 1.51,
|
|
|
2550 |
'pl/1' => 1.38,
|
|
|
2551 |
'pl/m' => 1.13,
|
|
|
2552 |
'pl/s' => 0.88,
|
|
|
2553 |
'pl/sql' => 2.58,
|
|
|
2554 |
'planit' => 1.51,
|
|
|
2555 |
'planner' => 1.25,
|
|
|
2556 |
'planperfect 1' => 11.43,
|
|
|
2557 |
'plato' => 1.51,
|
|
|
2558 |
'polyforth' => 1.25,
|
|
|
2559 |
'pop' => 1.38,
|
|
|
2560 |
'poplog' => 1.38,
|
|
|
2561 |
'power basic' => 1.63,
|
|
|
2562 |
'powerbuilder' => 3.33,
|
|
|
2563 |
'powerhouse' => 5.71,
|
|
|
2564 |
'ppl (plus)' => 2.00,
|
|
|
2565 |
'problemoriented default' => 1.13,
|
|
|
2566 |
'proc' => 2.96,
|
|
|
2567 |
'procedural default' => 0.75,
|
|
|
2568 |
'professional pascal' => 0.88,
|
|
|
2569 |
'program generator default' => 5.00,
|
|
|
2570 |
'progress v4' => 2.22,
|
|
|
2571 |
'proiv' => 1.38,
|
|
|
2572 |
'prolog' => 1.25,
|
|
|
2573 |
'prose' => 0.75,
|
|
|
2574 |
'proteus' => 0.75,
|
|
|
2575 |
'qbasic' => 1.38,
|
|
|
2576 |
'qbe' => 6.15,
|
|
|
2577 |
'qmf' => 5.33,
|
|
|
2578 |
'qnial' => 1.63,
|
|
|
2579 |
'quattro' => 13.33,
|
|
|
2580 |
'quattro pro' => 13.33,
|
|
|
2581 |
'query default' => 6.15,
|
|
|
2582 |
'quick basic 1' => 1.25,
|
|
|
2583 |
'quick basic 2' => 1.31,
|
|
|
2584 |
'quick basic 3' => 1.38,
|
|
|
2585 |
'quick c' => 0.63,
|
|
|
2586 |
'quickbuild' => 2.86,
|
|
|
2587 |
'quiz' => 5.33,
|
|
|
2588 |
'rally' => 2.00,
|
|
|
2589 |
'ramis ii' => 2.00,
|
|
|
2590 |
'rapidgen' => 2.86,
|
|
|
2591 |
'ratfor' => 0.88,
|
|
|
2592 |
'rdb' => 2.00,
|
|
|
2593 |
'realia' => 1.74,
|
|
|
2594 |
'realizer 1.0' => 2.00,
|
|
|
2595 |
'realizer 2.0' => 2.22,
|
|
|
2596 |
'relate/3000' => 2.00,
|
|
|
2597 |
'reuse default' => 16.00,
|
|
|
2598 |
'Rexx' => 1.19,
|
|
|
2599 |
'Rexx (mvs)' => 1.00,
|
|
|
2600 |
'Rexx (os/2)' => 1.74,
|
|
|
2601 |
'rm basic' => 0.88,
|
|
|
2602 |
'rm cobol' => 0.75,
|
|
|
2603 |
'rm fortran' => 0.75,
|
|
|
2604 |
'rpg i' => 1.00,
|
|
|
2605 |
'rpg ii' => 1.63,
|
|
|
2606 |
'rpg iii' => 1.63,
|
|
|
2607 |
'rtexpert 1.4' => 1.38,
|
|
|
2608 |
'sabretalk' => 0.90,
|
|
|
2609 |
'sail' => 0.75,
|
|
|
2610 |
'sapiens' => 5.00,
|
|
|
2611 |
'sas' => 1.95,
|
|
|
2612 |
'savvy' => 6.15,
|
|
|
2613 |
'sbasic' => 0.88,
|
|
|
2614 |
'sceptre' => 1.13,
|
|
|
2615 |
'scheme' => 1.51,
|
|
|
2616 |
'screen painter default' => 13.33,
|
|
|
2617 |
'sequal' => 6.67,
|
|
|
2618 |
'Bourne Shell' => 3.81,
|
|
|
2619 |
'Bourne Again Shell' => 3.81,
|
|
|
2620 |
'ksh' => 3.81,
|
|
|
2621 |
'C Shell' => 3.81,
|
|
|
2622 |
'siebel tools ' => 6.15,
|
|
|
2623 |
'simplan' => 2.22,
|
|
|
2624 |
'simscript' => 1.74,
|
|
|
2625 |
'simula' => 1.74,
|
|
|
2626 |
'simula 67' => 1.74,
|
|
|
2627 |
'simulation default' => 1.74,
|
|
|
2628 |
'slogan' => 0.98,
|
|
|
2629 |
'smalltalk' => 2.50,
|
|
|
2630 |
'smalltalk 286' => 3.81,
|
|
|
2631 |
'smalltalk 80' => 3.81,
|
|
|
2632 |
'smalltalk/v' => 3.81,
|
|
|
2633 |
'snap' => 1.00,
|
|
|
2634 |
'snobol24' => 0.63,
|
|
|
2635 |
'softscreen' => 5.71,
|
|
|
2636 |
'Softbridge Basic' => 2.76,
|
|
|
2637 |
'solo' => 1.38,
|
|
|
2638 |
'speakeasy' => 2.22,
|
|
|
2639 |
'spinnaker ppl' => 2.22,
|
|
|
2640 |
'splus' => 2.50,
|
|
|
2641 |
'spreadsheet default' => 13.33,
|
|
|
2642 |
'sps' => 0.25,
|
|
|
2643 |
'spss' => 2.50,
|
|
|
2644 |
'SQL' => 2.29,
|
|
|
2645 |
'sqlwindows' => 6.67,
|
|
|
2646 |
'statistical default' => 2.50,
|
|
|
2647 |
'strategem' => 2.22,
|
|
|
2648 |
'stress' => 1.13,
|
|
|
2649 |
'strongly typed default' => 0.88,
|
|
|
2650 |
'style' => 1.74,
|
|
|
2651 |
'superbase 1.3' => 2.22,
|
|
|
2652 |
'surpass' => 13.33,
|
|
|
2653 |
'sybase' => 2.00,
|
|
|
2654 |
'symantec c++' => 2.76,
|
|
|
2655 |
'symbolang' => 1.25,
|
|
|
2656 |
'synchroworks' => 4.44,
|
|
|
2657 |
'synon/2e' => 4.21,
|
|
|
2658 |
'systemw' => 2.22,
|
|
|
2659 |
'tandem access language' => 0.88,
|
|
|
2660 |
'Tcl/Tk' => 1.25,
|
|
|
2661 |
'Teamcenter def' => 1.00,
|
|
|
2662 |
'Teamcenter met' => 1.00,
|
|
|
2663 |
'Teamcenter mth' => 1.00,
|
|
|
2664 |
'telon' => 5.00,
|
|
|
2665 |
'tessaract' => 2.00,
|
|
|
2666 |
'the twin' => 13.33,
|
|
|
2667 |
'themis' => 6.15,
|
|
|
2668 |
'tiief' => 5.71,
|
|
|
2669 |
'topspeed c++' => 2.76,
|
|
|
2670 |
'transform' => 5.33,
|
|
|
2671 |
'translisp plus' => 1.43,
|
|
|
2672 |
'treet' => 1.25,
|
|
|
2673 |
'treetran' => 1.25,
|
|
|
2674 |
'trs80 basic' => 0.63,
|
|
|
2675 |
'true basic' => 1.25,
|
|
|
2676 |
'turbo c' => 0.63,
|
|
|
2677 |
# 'turbo c++' => 1.51,
|
|
|
2678 |
'turbo expert' => 1.63,
|
|
|
2679 |
'turbo pascal >5' => 1.63,
|
|
|
2680 |
'turbo pascal 14' => 1.00,
|
|
|
2681 |
'turbo pascal 45' => 1.13,
|
|
|
2682 |
'turbo prolog' => 1.00,
|
|
|
2683 |
'turing' => 1.00,
|
|
|
2684 |
'tutor' => 1.51,
|
|
|
2685 |
'twaice' => 1.63,
|
|
|
2686 |
'ucsd pascal' => 0.88,
|
|
|
2687 |
'ufo/ims' => 2.22,
|
|
|
2688 |
'uhelp' => 2.50,
|
|
|
2689 |
'uniface' => 5.00,
|
|
|
2690 |
# 'unix shell scripts' => 3.81,
|
|
|
2691 |
'vax acms' => 1.38,
|
|
|
2692 |
'vax ade' => 2.00,
|
|
|
2693 |
'vbscript' => 2.35,
|
|
|
2694 |
'vectran' => 0.75,
|
|
|
2695 |
'vhdl ' => 4.21,
|
|
|
2696 |
'vim script' => 3.00,
|
|
|
2697 |
'visible c' => 1.63,
|
|
|
2698 |
'visible cobol' => 2.00,
|
|
|
2699 |
'visicalc 1' => 8.89,
|
|
|
2700 |
'visual 4.0' => 2.76,
|
|
|
2701 |
'visual basic' => 1.90,
|
|
|
2702 |
'visual basic 1' => 1.74,
|
|
|
2703 |
'visual basic 2' => 1.86,
|
|
|
2704 |
'visual basic 3' => 2.00,
|
|
|
2705 |
'visual basic 4' => 2.22,
|
|
|
2706 |
'visual basic 5' => 2.76,
|
|
|
2707 |
'Visual Basic' => 2.76,
|
|
|
2708 |
'visual basic dos' => 2.00,
|
|
|
2709 |
'visual c++' => 2.35,
|
|
|
2710 |
'visual cobol' => 4.00,
|
|
|
2711 |
'visual objects' => 5.00,
|
|
|
2712 |
'visualage' => 3.81,
|
|
|
2713 |
'visualgen' => 4.44,
|
|
|
2714 |
'vpf' => 0.84,
|
|
|
2715 |
'vsrexx' => 2.50,
|
|
|
2716 |
'vulcan' => 1.25,
|
|
|
2717 |
'vz programmer' => 2.22,
|
|
|
2718 |
'warp x' => 2.00,
|
|
|
2719 |
'watcom c' => 0.63,
|
|
|
2720 |
'watcom c/386' => 0.63,
|
|
|
2721 |
'waterloo c' => 0.63,
|
|
|
2722 |
'waterloo pascal' => 0.88,
|
|
|
2723 |
'watfiv' => 0.94,
|
|
|
2724 |
'watfor' => 0.88,
|
|
|
2725 |
'web scripts' => 5.33,
|
|
|
2726 |
'whip' => 0.88,
|
|
|
2727 |
'wizard' => 2.86,
|
|
|
2728 |
'xlisp' => 1.25,
|
|
|
2729 |
'yacc' => 1.51,
|
|
|
2730 |
'yacc++' => 1.51,
|
|
|
2731 |
'YAML' => 0.90,
|
|
|
2732 |
'zbasic' => 0.88,
|
|
|
2733 |
'zim' => 4.21,
|
|
|
2734 |
'zlisp' => 1.25,
|
|
|
2735 |
|
|
|
2736 |
'Expect' => 2.00,
|
|
|
2737 |
'C/C++ Header' => 1.00,
|
|
|
2738 |
'inc' => 1.00,
|
|
|
2739 |
'lex' => 1.00,
|
|
|
2740 |
'MATLAB' => 4.00,
|
|
|
2741 |
'IDL' => 3.80,
|
|
|
2742 |
'Octave' => 4.00,
|
|
|
2743 |
'ML' => 3.00,
|
|
|
2744 |
'Modula3' => 2.00,
|
|
|
2745 |
'PHP' => 3.50,
|
|
|
2746 |
'Python' => 4.20,
|
|
|
2747 |
'Ruby' => 4.20,
|
|
|
2748 |
'sed' => 4.00,
|
|
|
2749 |
'Lua' => 4.00,
|
|
|
2750 |
);
|
|
|
2751 |
# 1}}}
|
|
|
2752 |
%{$rh_Known_Binary_Archives} = ( # {{{1
|
|
|
2753 |
'.tar' => 1 ,
|
|
|
2754 |
'.tar.Z' => 1 ,
|
|
|
2755 |
'.tar.gz' => 1 ,
|
|
|
2756 |
'.tar.bz2' => 1 ,
|
|
|
2757 |
'.zip' => 1 ,
|
|
|
2758 |
'.Zip' => 1 ,
|
|
|
2759 |
'.ZIP' => 1 ,
|
|
|
2760 |
'.ear' => 1 , # Java
|
|
|
2761 |
'.war' => 1 , # contained within .ear
|
|
|
2762 |
);
|
|
|
2763 |
# 1}}}
|
|
|
2764 |
} # end sub set_constants()
|
|
|
2765 |
sub Install_Regexp_Common { # {{{1
|
|
|
2766 |
# Installs portions of Damian Conway's & Abigail's Regexp::Common
|
|
|
2767 |
# module, v2.120, into a temporary directory for the duration of
|
|
|
2768 |
# this run.
|
|
|
2769 |
|
|
|
2770 |
my %Regexp_Common_Contents = ();
|
|
|
2771 |
$Regexp_Common_Contents{'Common'} = <<'EOCommon'; # {{{2
|
|
|
2772 |
package Regexp::Common;
|
|
|
2773 |
|
|
|
2774 |
use 5.00473;
|
|
|
2775 |
use strict;
|
|
|
2776 |
|
|
|
2777 |
local $^W = 1;
|
|
|
2778 |
|
|
|
2779 |
use vars qw /$VERSION %RE %sub_interface $AUTOLOAD/;
|
|
|
2780 |
|
|
|
2781 |
($VERSION) = q $Revision: 2.120 $ =~ /([\d.]+)/;
|
|
|
2782 |
|
|
|
2783 |
|
|
|
2784 |
sub _croak {
|
|
|
2785 |
require Carp;
|
|
|
2786 |
goto &Carp::croak;
|
|
|
2787 |
}
|
|
|
2788 |
|
|
|
2789 |
sub _carp {
|
|
|
2790 |
require Carp;
|
|
|
2791 |
goto &Carp::carp;
|
|
|
2792 |
}
|
|
|
2793 |
|
|
|
2794 |
sub new {
|
|
|
2795 |
my ($class, @data) = @_;
|
|
|
2796 |
my %self;
|
|
|
2797 |
tie %self, $class, @data;
|
|
|
2798 |
return \%self;
|
|
|
2799 |
}
|
|
|
2800 |
|
|
|
2801 |
sub TIEHASH {
|
|
|
2802 |
my ($class, @data) = @_;
|
|
|
2803 |
bless \@data, $class;
|
|
|
2804 |
}
|
|
|
2805 |
|
|
|
2806 |
sub FETCH {
|
|
|
2807 |
my ($self, $extra) = @_;
|
|
|
2808 |
return bless ref($self)->new(@$self, $extra), ref($self);
|
|
|
2809 |
}
|
|
|
2810 |
|
|
|
2811 |
# Modification for cloc: only need a few modules from Regexp::Common.
|
|
|
2812 |
my %imports = map {$_ => "Regexp::Common::$_"}
|
|
|
2813 |
qw /balanced comment delimited /;
|
|
|
2814 |
#my %imports = map {$_ => "Regexp::Common::$_"}
|
|
|
2815 |
# qw /balanced CC comment delimited lingua list
|
|
|
2816 |
# net number profanity SEN URI whitespace
|
|
|
2817 |
# zip/;
|
|
|
2818 |
|
|
|
2819 |
sub import {
|
|
|
2820 |
shift; # Shift off the class.
|
|
|
2821 |
tie %RE, __PACKAGE__;
|
|
|
2822 |
{
|
|
|
2823 |
no strict 'refs';
|
|
|
2824 |
*{caller() . "::RE"} = \%RE;
|
|
|
2825 |
}
|
|
|
2826 |
|
|
|
2827 |
my $saw_import;
|
|
|
2828 |
my $no_defaults;
|
|
|
2829 |
my %exclude;
|
|
|
2830 |
foreach my $entry (grep {!/^RE_/} @_) {
|
|
|
2831 |
if ($entry eq 'pattern') {
|
|
|
2832 |
no strict 'refs';
|
|
|
2833 |
*{caller() . "::pattern"} = \&pattern;
|
|
|
2834 |
next;
|
|
|
2835 |
}
|
|
|
2836 |
# This used to prevent $; from being set. We still recognize it,
|
|
|
2837 |
# but we won't do anything.
|
|
|
2838 |
if ($entry eq 'clean') {
|
|
|
2839 |
next;
|
|
|
2840 |
}
|
|
|
2841 |
if ($entry eq 'no_defaults') {
|
|
|
2842 |
$no_defaults ++;
|
|
|
2843 |
next;
|
|
|
2844 |
}
|
|
|
2845 |
if (my $module = $imports {$entry}) {
|
|
|
2846 |
$saw_import ++;
|
|
|
2847 |
eval "require $module;";
|
|
|
2848 |
die $@ if $@;
|
|
|
2849 |
next;
|
|
|
2850 |
}
|
|
|
2851 |
if ($entry =~ /^!(.*)/ && $imports {$1}) {
|
|
|
2852 |
$exclude {$1} ++;
|
|
|
2853 |
next;
|
|
|
2854 |
}
|
|
|
2855 |
# As a last resort, try to load the argument.
|
|
|
2856 |
my $module = $entry =~ /^Regexp::Common/
|
|
|
2857 |
? $entry
|
|
|
2858 |
: "Regexp::Common::" . $entry;
|
|
|
2859 |
eval "require $module;";
|
|
|
2860 |
die $@ if $@;
|
|
|
2861 |
}
|
|
|
2862 |
|
|
|
2863 |
unless ($saw_import || $no_defaults) {
|
|
|
2864 |
foreach my $module (values %imports) {
|
|
|
2865 |
next if $exclude {$module};
|
|
|
2866 |
eval "require $module;";
|
|
|
2867 |
die $@ if $@;
|
|
|
2868 |
}
|
|
|
2869 |
}
|
|
|
2870 |
|
|
|
2871 |
my %exported;
|
|
|
2872 |
foreach my $entry (grep {/^RE_/} @_) {
|
|
|
2873 |
if ($entry =~ /^RE_(\w+_)?ALL$/) {
|
|
|
2874 |
my $m = defined $1 ? $1 : "";
|
|
|
2875 |
my $re = qr /^RE_${m}.*$/;
|
|
|
2876 |
while (my ($sub, $interface) = each %sub_interface) {
|
|
|
2877 |
next if $exported {$sub};
|
|
|
2878 |
next unless $sub =~ /$re/;
|
|
|
2879 |
{
|
|
|
2880 |
no strict 'refs';
|
|
|
2881 |
*{caller() . "::$sub"} = $interface;
|
|
|
2882 |
}
|
|
|
2883 |
$exported {$sub} ++;
|
|
|
2884 |
}
|
|
|
2885 |
}
|
|
|
2886 |
else {
|
|
|
2887 |
next if $exported {$entry};
|
|
|
2888 |
_croak "Can't export unknown subroutine &$entry"
|
|
|
2889 |
unless $sub_interface {$entry};
|
|
|
2890 |
{
|
|
|
2891 |
no strict 'refs';
|
|
|
2892 |
*{caller() . "::$entry"} = $sub_interface {$entry};
|
|
|
2893 |
}
|
|
|
2894 |
$exported {$entry} ++;
|
|
|
2895 |
}
|
|
|
2896 |
}
|
|
|
2897 |
}
|
|
|
2898 |
|
|
|
2899 |
sub AUTOLOAD { _croak "Can't $AUTOLOAD" }
|
|
|
2900 |
|
|
|
2901 |
sub DESTROY {}
|
|
|
2902 |
|
|
|
2903 |
my %cache;
|
|
|
2904 |
|
|
|
2905 |
my $fpat = qr/^(-\w+)/;
|
|
|
2906 |
|
|
|
2907 |
sub _decache {
|
|
|
2908 |
my @args = @{tied %{$_[0]}};
|
|
|
2909 |
my @nonflags = grep {!/$fpat/} @args;
|
|
|
2910 |
my $cache = get_cache(@nonflags);
|
|
|
2911 |
_croak "Can't create unknown regex: \$RE{"
|
|
|
2912 |
. join("}{",@args) . "}"
|
|
|
2913 |
unless exists $cache->{__VAL__};
|
|
|
2914 |
_croak "Perl $] does not support the pattern "
|
|
|
2915 |
. "\$RE{" . join("}{",@args)
|
|
|
2916 |
. "}.\nYou need Perl $cache->{__VAL__}{version} or later"
|
|
|
2917 |
unless ($cache->{__VAL__}{version}||0) <= $];
|
|
|
2918 |
my %flags = ( %{$cache->{__VAL__}{default}},
|
|
|
2919 |
map { /$fpat\Q$;\E(.*)/ ? ($1 => $2)
|
|
|
2920 |
: /$fpat/ ? ($1 => undef)
|
|
|
2921 |
: ()
|
|
|
2922 |
} @args);
|
|
|
2923 |
$cache->{__VAL__}->_clone_with(\@args, \%flags);
|
|
|
2924 |
}
|
|
|
2925 |
|
|
|
2926 |
use overload q{""} => \&_decache;
|
|
|
2927 |
|
|
|
2928 |
|
|
|
2929 |
sub get_cache {
|
|
|
2930 |
my $cache = \%cache;
|
|
|
2931 |
foreach (@_) {
|
|
|
2932 |
$cache = $cache->{$_}
|
|
|
2933 |
|| ($cache->{$_} = {});
|
|
|
2934 |
}
|
|
|
2935 |
return $cache;
|
|
|
2936 |
}
|
|
|
2937 |
|
|
|
2938 |
sub croak_version {
|
|
|
2939 |
my ($entry, @args) = @_;
|
|
|
2940 |
}
|
|
|
2941 |
|
|
|
2942 |
sub pattern {
|
|
|
2943 |
my %spec = @_;
|
|
|
2944 |
_croak 'pattern() requires argument: name => [ @list ]'
|
|
|
2945 |
unless $spec{name} && ref $spec{name} eq 'ARRAY';
|
|
|
2946 |
_croak 'pattern() requires argument: create => $sub_ref_or_string'
|
|
|
2947 |
unless $spec{create};
|
|
|
2948 |
|
|
|
2949 |
if (ref $spec{create} ne "CODE") {
|
|
|
2950 |
my $fixed_str = "$spec{create}";
|
|
|
2951 |
$spec{create} = sub { $fixed_str }
|
|
|
2952 |
}
|
|
|
2953 |
|
|
|
2954 |
my @nonflags;
|
|
|
2955 |
my %default;
|
|
|
2956 |
foreach ( @{$spec{name}} ) {
|
|
|
2957 |
if (/$fpat=(.*)/) {
|
|
|
2958 |
$default{$1} = $2;
|
|
|
2959 |
}
|
|
|
2960 |
elsif (/$fpat\s*$/) {
|
|
|
2961 |
$default{$1} = undef;
|
|
|
2962 |
}
|
|
|
2963 |
else {
|
|
|
2964 |
push @nonflags, $_;
|
|
|
2965 |
}
|
|
|
2966 |
}
|
|
|
2967 |
|
|
|
2968 |
my $entry = get_cache(@nonflags);
|
|
|
2969 |
|
|
|
2970 |
if ($entry->{__VAL__}) {
|
|
|
2971 |
_carp "Overriding \$RE{"
|
|
|
2972 |
. join("}{",@nonflags)
|
|
|
2973 |
. "}";
|
|
|
2974 |
}
|
|
|
2975 |
|
|
|
2976 |
$entry->{__VAL__} = bless {
|
|
|
2977 |
create => $spec{create},
|
|
|
2978 |
match => $spec{match} || \&generic_match,
|
|
|
2979 |
subs => $spec{subs} || \&generic_subs,
|
|
|
2980 |
version => $spec{version},
|
|
|
2981 |
default => \%default,
|
|
|
2982 |
}, 'Regexp::Common::Entry';
|
|
|
2983 |
|
|
|
2984 |
foreach (@nonflags) {s/\W/X/g}
|
|
|
2985 |
my $subname = "RE_" . join ("_", @nonflags);
|
|
|
2986 |
$sub_interface{$subname} = sub {
|
|
|
2987 |
push @_ => undef if @_ % 2;
|
|
|
2988 |
my %flags = @_;
|
|
|
2989 |
my $pat = $spec{create}->($entry->{__VAL__},
|
|
|
2990 |
{%default, %flags}, \@nonflags);
|
|
|
2991 |
if (exists $flags{-keep}) { $pat =~ s/\Q(?k:/(/g; }
|
|
|
2992 |
else { $pat =~ s/\Q(?k:/(?:/g; }
|
|
|
2993 |
return exists $flags {-i} ? qr /(?i:$pat)/ : qr/$pat/;
|
|
|
2994 |
};
|
|
|
2995 |
|
|
|
2996 |
return 1;
|
|
|
2997 |
}
|
|
|
2998 |
|
|
|
2999 |
sub generic_match {$_ [1] =~ /$_[0]/}
|
|
|
3000 |
sub generic_subs {$_ [1] =~ s/$_[0]/$_[2]/}
|
|
|
3001 |
|
|
|
3002 |
sub matches {
|
|
|
3003 |
my ($self, $str) = @_;
|
|
|
3004 |
my $entry = $self -> _decache;
|
|
|
3005 |
$entry -> {match} -> ($entry, $str);
|
|
|
3006 |
}
|
|
|
3007 |
|
|
|
3008 |
sub subs {
|
|
|
3009 |
my ($self, $str, $newstr) = @_;
|
|
|
3010 |
my $entry = $self -> _decache;
|
|
|
3011 |
$entry -> {subs} -> ($entry, $str, $newstr);
|
|
|
3012 |
return $str;
|
|
|
3013 |
}
|
|
|
3014 |
|
|
|
3015 |
|
|
|
3016 |
package Regexp::Common::Entry;
|
|
|
3017 |
# use Carp;
|
|
|
3018 |
|
|
|
3019 |
local $^W = 1;
|
|
|
3020 |
|
|
|
3021 |
use overload
|
|
|
3022 |
q{""} => sub {
|
|
|
3023 |
my ($self) = @_;
|
|
|
3024 |
my $pat = $self->{create}->($self, $self->{flags}, $self->{args});
|
|
|
3025 |
if (exists $self->{flags}{-keep}) {
|
|
|
3026 |
$pat =~ s/\Q(?k:/(/g;
|
|
|
3027 |
}
|
|
|
3028 |
else {
|
|
|
3029 |
$pat =~ s/\Q(?k:/(?:/g;
|
|
|
3030 |
}
|
|
|
3031 |
if (exists $self->{flags}{-i}) { $pat = "(?i)$pat" }
|
|
|
3032 |
return $pat;
|
|
|
3033 |
};
|
|
|
3034 |
|
|
|
3035 |
sub _clone_with {
|
|
|
3036 |
my ($self, $args, $flags) = @_;
|
|
|
3037 |
bless { %$self, args=>$args, flags=>$flags }, ref $self;
|
|
|
3038 |
}
|
|
|
3039 |
#
|
|
|
3040 |
# Copyright (c) 2001 - 2005, Damian Conway and Abigail. All Rights
|
|
|
3041 |
# Reserved. This module is free software. It may be used, redistributed
|
|
|
3042 |
# and/or modified under the terms of the Perl Artistic License
|
|
|
3043 |
# (see http://www.perl.com/perl/misc/Artistic.html)
|
|
|
3044 |
EOCommon
|
|
|
3045 |
# 2}}}
|
|
|
3046 |
$Regexp_Common_Contents{'Common/comment'} = <<'EOC'; # {{{2
|
|
|
3047 |
# $Id: comment.pm,v 2.116 2005/03/16 00:00:02 abigail Exp $
|
|
|
3048 |
|
|
|
3049 |
package Regexp::Common::comment;
|
|
|
3050 |
|
|
|
3051 |
use strict;
|
|
|
3052 |
local $^W = 1;
|
|
|
3053 |
|
|
|
3054 |
use Regexp::Common qw /pattern clean no_defaults/;
|
|
|
3055 |
use vars qw /$VERSION/;
|
|
|
3056 |
|
|
|
3057 |
($VERSION) = q $Revision: 2.116 $ =~ /[\d.]+/g;
|
|
|
3058 |
|
|
|
3059 |
my @generic = (
|
|
|
3060 |
{languages => [qw /ABC Forth/],
|
|
|
3061 |
to_eol => ['\\\\']}, # This is for just a *single* backslash.
|
|
|
3062 |
|
|
|
3063 |
{languages => [qw /Ada Alan Eiffel lua/],
|
|
|
3064 |
to_eol => ['--']},
|
|
|
3065 |
|
|
|
3066 |
{languages => [qw /Advisor/],
|
|
|
3067 |
to_eol => ['#|//']},
|
|
|
3068 |
|
|
|
3069 |
{languages => [qw /Advsys CQL Lisp LOGO M MUMPS REBOL Scheme
|
|
|
3070 |
SMITH zonefile/],
|
|
|
3071 |
to_eol => [';']},
|
|
|
3072 |
|
|
|
3073 |
{languages => ['Algol 60'],
|
|
|
3074 |
from_to => [[qw /comment ;/]]},
|
|
|
3075 |
|
|
|
3076 |
{languages => [qw {ALPACA B C C-- LPC PL/I}],
|
|
|
3077 |
from_to => [[qw {/* */}]]},
|
|
|
3078 |
|
|
|
3079 |
{languages => [qw /awk fvwm2 Icon mutt Perl Python QML R Ruby shell Tcl/],
|
|
|
3080 |
to_eol => ['#']},
|
|
|
3081 |
|
|
|
3082 |
{languages => [[BASIC => 'mvEnterprise']],
|
|
|
3083 |
to_eol => ['[*!]|REM']},
|
|
|
3084 |
|
|
|
3085 |
{languages => [qw /Befunge-98 Funge-98 Shelta/],
|
|
|
3086 |
id => [';']},
|
|
|
3087 |
|
|
|
3088 |
{languages => ['beta-Juliet', 'Crystal Report', 'Portia'],
|
|
|
3089 |
to_eol => ['//']},
|
|
|
3090 |
|
|
|
3091 |
{languages => ['BML'],
|
|
|
3092 |
from_to => [['<?_c', '_c?>']],
|
|
|
3093 |
},
|
|
|
3094 |
|
|
|
3095 |
{languages => [qw /C++/, 'C#', qw /Cg ECMAScript FPL Java JavaScript/],
|
|
|
3096 |
to_eol => ['//'],
|
|
|
3097 |
from_to => [[qw {/* */}]]},
|
|
|
3098 |
|
|
|
3099 |
{languages => [qw /CLU LaTeX slrn TeX/],
|
|
|
3100 |
to_eol => ['%']},
|
|
|
3101 |
|
|
|
3102 |
{languages => [qw /False/],
|
|
|
3103 |
from_to => [[qw !{ }!]]},
|
|
|
3104 |
|
|
|
3105 |
{languages => [qw /Fortran/],
|
|
|
3106 |
to_eol => ['!']},
|
|
|
3107 |
|
|
|
3108 |
{languages => [qw /Haifu/],
|
|
|
3109 |
id => [',']},
|
|
|
3110 |
|
|
|
3111 |
{languages => [qw /ILLGOL/],
|
|
|
3112 |
to_eol => ['NB']},
|
|
|
3113 |
|
|
|
3114 |
{languages => [qw /INTERCAL/],
|
|
|
3115 |
to_eol => [q{(?:(?:PLEASE(?:\s+DO)?|DO)\s+)?(?:NOT|N'T)}]},
|
|
|
3116 |
|
|
|
3117 |
{languages => [qw /J/],
|
|
|
3118 |
to_eol => ['NB[.]']},
|
|
|
3119 |
|
|
|
3120 |
{languages => [qw /Nickle/],
|
|
|
3121 |
to_eol => ['#'],
|
|
|
3122 |
from_to => [[qw {/* */}]]},
|
|
|
3123 |
|
|
|
3124 |
{languages => [qw /Oberon/],
|
|
|
3125 |
from_to => [[qw /(* *)/]]},
|
|
|
3126 |
|
|
|
3127 |
{languages => [[qw /Pascal Delphi/], [qw /Pascal Free/], [qw /Pascal GPC/]],
|
|
|
3128 |
to_eol => ['//'],
|
|
|
3129 |
from_to => [[qw !{ }!], [qw !(* *)!]]},
|
|
|
3130 |
|
|
|
3131 |
{languages => [[qw /Pascal Workshop/]],
|
|
|
3132 |
id => [qw /"/],
|
|
|
3133 |
from_to => [[qw !{ }!], [qw !(* *)!], [qw !/* */!]]},
|
|
|
3134 |
|
|
|
3135 |
{languages => [qw /PEARL/],
|
|
|
3136 |
to_eol => ['!'],
|
|
|
3137 |
from_to => [[qw {/* */}]]},
|
|
|
3138 |
|
|
|
3139 |
{languages => [qw /PHP/],
|
|
|
3140 |
to_eol => ['#', '//'],
|
|
|
3141 |
from_to => [[qw {/* */}]]},
|
|
|
3142 |
|
|
|
3143 |
{languages => [qw !PL/B!],
|
|
|
3144 |
to_eol => ['[.;]']},
|
|
|
3145 |
|
|
|
3146 |
{languages => [qw !PL/SQL!],
|
|
|
3147 |
to_eol => ['--'],
|
|
|
3148 |
from_to => [[qw {/* */}]]},
|
|
|
3149 |
|
|
|
3150 |
{languages => [qw /Q-BAL/],
|
|
|
3151 |
to_eol => ['`']},
|
|
|
3152 |
|
|
|
3153 |
{languages => [qw /Smalltalk/],
|
|
|
3154 |
id => ['"']},
|
|
|
3155 |
|
|
|
3156 |
{languages => [qw /SQL/],
|
|
|
3157 |
to_eol => ['-{2,}']},
|
|
|
3158 |
|
|
|
3159 |
{languages => [qw /troff/],
|
|
|
3160 |
to_eol => ['\\\"']},
|
|
|
3161 |
|
|
|
3162 |
{languages => [qw /vi/],
|
|
|
3163 |
to_eol => ['"']},
|
|
|
3164 |
|
|
|
3165 |
{languages => [qw /*W/],
|
|
|
3166 |
from_to => [[qw {|| !!}]]},
|
|
|
3167 |
);
|
|
|
3168 |
|
|
|
3169 |
my @plain_or_nested = (
|
|
|
3170 |
[Caml => undef, "(*" => "*)"],
|
|
|
3171 |
[Dylan => "//", "/*" => "*/"],
|
|
|
3172 |
[Haskell => "-{2,}", "{-" => "-}"],
|
|
|
3173 |
[Hugo => "!(?!\\\\)", "!\\" => "\\!"],
|
|
|
3174 |
[SLIDE => "#", "(*" => "*)"],
|
|
|
3175 |
);
|
|
|
3176 |
|
|
|
3177 |
#
|
|
|
3178 |
# Helper subs.
|
|
|
3179 |
#
|
|
|
3180 |
|
|
|
3181 |
sub combine {
|
|
|
3182 |
local $_ = join "|", @_;
|
|
|
3183 |
if (@_ > 1) {
|
|
|
3184 |
s/\(\?k:/(?:/g;
|
|
|
3185 |
$_ = "(?k:$_)";
|
|
|
3186 |
}
|
|
|
3187 |
$_
|
|
|
3188 |
}
|
|
|
3189 |
|
|
|
3190 |
sub to_eol ($) {"(?k:(?k:$_[0])(?k:[^\\n]*)(?k:\\n))"}
|
|
|
3191 |
sub id ($) {"(?k:(?k:$_[0])(?k:[^$_[0]]*)(?k:$_[0]))"} # One char only!
|
|
|
3192 |
sub from_to {
|
|
|
3193 |
local $^W = 1;
|
|
|
3194 |
my ($begin, $end) = @_;
|
|
|
3195 |
|
|
|
3196 |
my $qb = quotemeta $begin;
|
|
|
3197 |
my $qe = quotemeta $end;
|
|
|
3198 |
my $fe = quotemeta substr $end => 0, 1;
|
|
|
3199 |
my $te = quotemeta substr $end => 1;
|
|
|
3200 |
|
|
|
3201 |
"(?k:(?k:$qb)(?k:(?:[^$fe]+|$fe(?!$te))*)(?k:$qe))";
|
|
|
3202 |
}
|
|
|
3203 |
|
|
|
3204 |
|
|
|
3205 |
my $count = 0;
|
|
|
3206 |
sub nested {
|
|
|
3207 |
local $^W = 1;
|
|
|
3208 |
my ($begin, $end) = @_;
|
|
|
3209 |
|
|
|
3210 |
$count ++;
|
|
|
3211 |
my $r = '(??{$Regexp::Common::comment ['. $count . ']})';
|
|
|
3212 |
|
|
|
3213 |
my $qb = quotemeta $begin;
|
|
|
3214 |
my $qe = quotemeta $end;
|
|
|
3215 |
my $fb = quotemeta substr $begin => 0, 1;
|
|
|
3216 |
my $fe = quotemeta substr $end => 0, 1;
|
|
|
3217 |
|
|
|
3218 |
my $tb = quotemeta substr $begin => 1;
|
|
|
3219 |
my $te = quotemeta substr $end => 1;
|
|
|
3220 |
|
|
|
3221 |
use re 'eval';
|
|
|
3222 |
|
|
|
3223 |
my $re;
|
|
|
3224 |
if ($fb eq $fe) {
|
|
|
3225 |
$re = qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
|
|
|
3226 |
}
|
|
|
3227 |
else {
|
|
|
3228 |
local $" = "|";
|
|
|
3229 |
my @clauses = "(?>[^$fb$fe]+)";
|
|
|
3230 |
push @clauses => "$fb(?!$tb)" if length $tb;
|
|
|
3231 |
push @clauses => "$fe(?!$te)" if length $te;
|
|
|
3232 |
push @clauses => $r;
|
|
|
3233 |
$re = qr /(?:$qb(?:@clauses)*$qe)/;
|
|
|
3234 |
}
|
|
|
3235 |
|
|
|
3236 |
$Regexp::Common::comment [$count] = qr/$re/;
|
|
|
3237 |
}
|
|
|
3238 |
|
|
|
3239 |
#
|
|
|
3240 |
# Process data.
|
|
|
3241 |
#
|
|
|
3242 |
|
|
|
3243 |
foreach my $info (@plain_or_nested) {
|
|
|
3244 |
my ($language, $mark, $begin, $end) = @$info;
|
|
|
3245 |
pattern name => [comment => $language],
|
|
|
3246 |
create =>
|
|
|
3247 |
sub {my $re = nested $begin => $end;
|
|
|
3248 |
my $prefix = defined $mark ? $mark . "[^\n]*\n|" : "";
|
|
|
3249 |
exists $_ [1] -> {-keep} ? qr /($prefix$re)/
|
|
|
3250 |
: qr /$prefix$re/
|
|
|
3251 |
},
|
|
|
3252 |
version => 5.006,
|
|
|
3253 |
;
|
|
|
3254 |
}
|
|
|
3255 |
|
|
|
3256 |
|
|
|
3257 |
foreach my $group (@generic) {
|
|
|
3258 |
my $pattern = combine +(map {to_eol $_} @{$group -> {to_eol}}),
|
|
|
3259 |
(map {from_to @$_} @{$group -> {from_to}}),
|
|
|
3260 |
(map {id $_} @{$group -> {id}}),
|
|
|
3261 |
;
|
|
|
3262 |
foreach my $language (@{$group -> {languages}}) {
|
|
|
3263 |
pattern name => [comment => ref $language ? @$language : $language],
|
|
|
3264 |
create => $pattern,
|
|
|
3265 |
;
|
|
|
3266 |
}
|
|
|
3267 |
}
|
|
|
3268 |
|
|
|
3269 |
|
|
|
3270 |
|
|
|
3271 |
#
|
|
|
3272 |
# Other languages.
|
|
|
3273 |
#
|
|
|
3274 |
|
|
|
3275 |
# http://www.pascal-central.com/docs/iso10206.txt
|
|
|
3276 |
pattern name => [qw /comment Pascal/],
|
|
|
3277 |
create => '(?k:' . '(?k:[{]|[(][*])'
|
|
|
3278 |
. '(?k:[^}*]*(?:[*][^)][^}*]*)*)'
|
|
|
3279 |
. '(?k:[}]|[*][)])'
|
|
|
3280 |
. ')'
|
|
|
3281 |
;
|
|
|
3282 |
|
|
|
3283 |
# http://www.templetons.com/brad/alice/language/
|
|
|
3284 |
pattern name => [qw /comment Pascal Alice/],
|
|
|
3285 |
create => '(?k:(?k:[{])(?k:[^}\n]*)(?k:[}]))'
|
|
|
3286 |
;
|
|
|
3287 |
|
|
|
3288 |
|
|
|
3289 |
# http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt
|
|
|
3290 |
pattern name => [qw (comment), 'Algol 68'],
|
|
|
3291 |
create => q {(?k:(?:#[^#]*#)|} .
|
|
|
3292 |
q {(?:\bco\b(?:[^c]+|\Bc|\bc(?!o\b))*\bco\b)|} .
|
|
|
3293 |
q {(?:\bcomment\b(?:[^c]+|\Bc|\bc(?!omment\b))*\bcomment\b))}
|
|
|
3294 |
;
|
|
|
3295 |
|
|
|
3296 |
|
|
|
3297 |
# See rules 91 and 92 of ISO 8879 (SGML).
|
|
|
3298 |
# Charles F. Goldfarb: "The SGML Handbook".
|
|
|
3299 |
# Oxford: Oxford University Press. 1990. ISBN 0-19-853737-9.
|
|
|
3300 |
# Ch. 10.3, pp 390.
|
|
|
3301 |
pattern name => [qw (comment HTML)],
|
|
|
3302 |
create => q {(?k:(?k:<!)(?k:(?:--(?k:[^-]*(?:-[^-]+)*)--\s*)*)(?k:>))},
|
|
|
3303 |
;
|
|
|
3304 |
|
|
|
3305 |
|
|
|
3306 |
pattern name => [qw /comment SQL MySQL/],
|
|
|
3307 |
create => q {(?k:(?:#|-- )[^\n]*\n|} .
|
|
|
3308 |
q {/\*(?:(?>[^*;"']+)|"[^"]*"|'[^']*'|\*(?!/))*(?:;|\*/))},
|
|
|
3309 |
;
|
|
|
3310 |
|
|
|
3311 |
# Anything that isn't <>[]+-.,
|
|
|
3312 |
# http://home.wxs.nl/~faase009/Ha_BF.html
|
|
|
3313 |
pattern name => [qw /comment Brainfuck/],
|
|
|
3314 |
create => '(?k:[^<>\[\]+\-.,]+)'
|
|
|
3315 |
;
|
|
|
3316 |
|
|
|
3317 |
# Squeak is a variant of Smalltalk-80.
|
|
|
3318 |
# http://www.squeak.
|
|
|
3319 |
# http://mucow.com/squeak-qref.html
|
|
|
3320 |
pattern name => [qw /comment Squeak/],
|
|
|
3321 |
create => '(?k:(?k:")(?k:[^"]*(?:""[^"]*)*)(?k:"))'
|
|
|
3322 |
;
|
|
|
3323 |
|
|
|
3324 |
#
|
|
|
3325 |
# Scores of less than 5 or above 17....
|
|
|
3326 |
# http://www.cliff.biffle.org/esoterica/beatnik.html
|
|
|
3327 |
@Regexp::Common::comment::scores = (1, 3, 3, 2, 1, 4, 2, 4, 1, 8,
|
|
|
3328 |
5, 1, 3, 1, 1, 3, 10, 1, 1, 1,
|
|
|
3329 |
1, 4, 4, 8, 4, 10);
|
|
|
3330 |
pattern name => [qw /comment Beatnik/],
|
|
|
3331 |
create => sub {
|
|
|
3332 |
use re 'eval';
|
|
|
3333 |
my ($s, $x);
|
|
|
3334 |
my $re = qr {\b([A-Za-z]+)\b
|
|
|
3335 |
(?(?{($s, $x) = (0, lc $^N);
|
|
|
3336 |
$s += $Regexp::Common::comment::scores
|
|
|
3337 |
[ord (chop $x) - ord ('a')] while length $x;
|
|
|
3338 |
$s >= 5 && $s < 18})XXX|)}x;
|
|
|
3339 |
$re;
|
|
|
3340 |
},
|
|
|
3341 |
version => 5.008,
|
|
|
3342 |
;
|
|
|
3343 |
|
|
|
3344 |
|
|
|
3345 |
# http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/
|
|
|
3346 |
# (Goto table of contents/3.3 Source Form)
|
|
|
3347 |
# Fortran, in fixed format. Comments start with a C, c or * in the first
|
|
|
3348 |
# column, or a ! anywhere, but the sixth column. Then end with a newline.
|
|
|
3349 |
pattern name => [qw /comment Fortran fixed/],
|
|
|
3350 |
create => '(?k:(?k:(?:^[Cc*]|(?<!^.....)!))(?k:[^\n]*)(?k:\n))'
|
|
|
3351 |
;
|
|
|
3352 |
|
|
|
3353 |
|
|
|
3354 |
# http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm
|
|
|
3355 |
# Traditionally, comments in COBOL were indicated with an asteriks in
|
|
|
3356 |
# the seventh column. Modern compilers may be more lenient.
|
|
|
3357 |
pattern name => [qw /comment COBOL/],
|
|
|
3358 |
create => '(?<=^......)(?k:(?k:[*])(?k:[^\n]*)(?k:\n))',
|
|
|
3359 |
version => '5.008',
|
|
|
3360 |
;
|
|
|
3361 |
|
|
|
3362 |
1;
|
|
|
3363 |
#
|
|
|
3364 |
# Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
|
|
|
3365 |
# This module is free software. It may be used, redistributed
|
|
|
3366 |
# and/or modified under the terms of the Perl Artistic License
|
|
|
3367 |
# (see http://www.perl.com/perl/misc/Artistic.html)
|
|
|
3368 |
EOC
|
|
|
3369 |
# 2}}}
|
|
|
3370 |
$Regexp_Common_Contents{'Common/balanced'} = <<'EOB'; # {{{2
|
|
|
3371 |
package Regexp::Common::balanced; {
|
|
|
3372 |
|
|
|
3373 |
use strict;
|
|
|
3374 |
local $^W = 1;
|
|
|
3375 |
|
|
|
3376 |
use vars qw /$VERSION/;
|
|
|
3377 |
($VERSION) = q $Revision: 2.101 $ =~ /[\d.]+/g;
|
|
|
3378 |
|
|
|
3379 |
use Regexp::Common qw /pattern clean no_defaults/;
|
|
|
3380 |
|
|
|
3381 |
my %closer = ( '{'=>'}', '('=>')', '['=>']', '<'=>'>' );
|
|
|
3382 |
my $count = -1;
|
|
|
3383 |
my %cache;
|
|
|
3384 |
|
|
|
3385 |
sub nested {
|
|
|
3386 |
local $^W = 1;
|
|
|
3387 |
my ($start, $finish) = @_;
|
|
|
3388 |
|
|
|
3389 |
return $Regexp::Common::balanced [$cache {$start} {$finish}]
|
|
|
3390 |
if exists $cache {$start} {$finish};
|
|
|
3391 |
|
|
|
3392 |
$count ++;
|
|
|
3393 |
my $r = '(??{$Regexp::Common::balanced ['. $count . ']})';
|
|
|
3394 |
|
|
|
3395 |
my @starts = map {s/\\(.)/$1/g; $_} grep {length}
|
|
|
3396 |
$start =~ /([^|\\]+|\\.)+/gs;
|
|
|
3397 |
my @finishes = map {s/\\(.)/$1/g; $_} grep {length}
|
|
|
3398 |
$finish =~ /([^|\\]+|\\.)+/gs;
|
|
|
3399 |
|
|
|
3400 |
push @finishes => ($finishes [-1]) x (@starts - @finishes);
|
|
|
3401 |
|
|
|
3402 |
my @re;
|
|
|
3403 |
local $" = "|";
|
|
|
3404 |
foreach my $begin (@starts) {
|
|
|
3405 |
my $end = shift @finishes;
|
|
|
3406 |
|
|
|
3407 |
my $qb = quotemeta $begin;
|
|
|
3408 |
my $qe = quotemeta $end;
|
|
|
3409 |
my $fb = quotemeta substr $begin => 0, 1;
|
|
|
3410 |
my $fe = quotemeta substr $end => 0, 1;
|
|
|
3411 |
|
|
|
3412 |
my $tb = quotemeta substr $begin => 1;
|
|
|
3413 |
my $te = quotemeta substr $end => 1;
|
|
|
3414 |
|
|
|
3415 |
use re 'eval';
|
|
|
3416 |
|
|
|
3417 |
my $add;
|
|
|
3418 |
if ($fb eq $fe) {
|
|
|
3419 |
push @re =>
|
|
|
3420 |
qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
|
|
|
3421 |
}
|
|
|
3422 |
else {
|
|
|
3423 |
my @clauses = "(?>[^$fb$fe]+)";
|
|
|
3424 |
push @clauses => "$fb(?!$tb)" if length $tb;
|
|
|
3425 |
push @clauses => "$fe(?!$te)" if length $te;
|
|
|
3426 |
push @clauses => $r;
|
|
|
3427 |
push @re => qr /(?:$qb(?:@clauses)*$qe)/;
|
|
|
3428 |
}
|
|
|
3429 |
}
|
|
|
3430 |
|
|
|
3431 |
$cache {$start} {$finish} = $count;
|
|
|
3432 |
$Regexp::Common::balanced [$count] = qr/@re/;
|
|
|
3433 |
}
|
|
|
3434 |
|
|
|
3435 |
|
|
|
3436 |
pattern name => [qw /balanced -parens=() -begin= -end=/],
|
|
|
3437 |
create => sub {
|
|
|
3438 |
my $flag = $_[1];
|
|
|
3439 |
unless (defined $flag -> {-begin} && length $flag -> {-begin} &&
|
|
|
3440 |
defined $flag -> {-end} && length $flag -> {-end}) {
|
|
|
3441 |
my @open = grep {index ($flag->{-parens}, $_) >= 0}
|
|
|
3442 |
('[','(','{','<');
|
|
|
3443 |
my @close = map {$closer {$_}} @open;
|
|
|
3444 |
$flag -> {-begin} = join "|" => @open;
|
|
|
3445 |
$flag -> {-end} = join "|" => @close;
|
|
|
3446 |
}
|
|
|
3447 |
my $pat = nested @$flag {qw /-begin -end/};
|
|
|
3448 |
return exists $flag -> {-keep} ? qr /($pat)/ : $pat;
|
|
|
3449 |
},
|
|
|
3450 |
version => 5.006,
|
|
|
3451 |
;
|
|
|
3452 |
|
|
|
3453 |
}
|
|
|
3454 |
|
|
|
3455 |
1;
|
|
|
3456 |
#
|
|
|
3457 |
# Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
|
|
|
3458 |
# This module is free software. It may be used, redistributed
|
|
|
3459 |
# and/or modified under the terms of the Perl Artistic License
|
|
|
3460 |
# (see http://www.perl.com/perl/misc/Artistic.html)
|
|
|
3461 |
EOB
|
|
|
3462 |
# 2}}}
|
|
|
3463 |
$Regexp_Common_Contents{'Common/delimited'} = <<'EOD'; # {{{2
|
|
|
3464 |
# $Id: delimited.pm,v 2.104 2005/03/16 00:22:45 abigail Exp $
|
|
|
3465 |
|
|
|
3466 |
package Regexp::Common::delimited;
|
|
|
3467 |
|
|
|
3468 |
use strict;
|
|
|
3469 |
local $^W = 1;
|
|
|
3470 |
|
|
|
3471 |
use Regexp::Common qw /pattern clean no_defaults/;
|
|
|
3472 |
use vars qw /$VERSION/;
|
|
|
3473 |
|
|
|
3474 |
($VERSION) = q $Revision: 2.104 $ =~ /[\d.]+/g;
|
|
|
3475 |
|
|
|
3476 |
sub gen_delimited {
|
|
|
3477 |
|
|
|
3478 |
my ($dels, $escs) = @_;
|
|
|
3479 |
# return '(?:\S*)' unless $dels =~ /\S/;
|
|
|
3480 |
if (length $escs) {
|
|
|
3481 |
$escs .= substr ($escs, -1) x (length ($dels) - length ($escs));
|
|
|
3482 |
}
|
|
|
3483 |
my @pat = ();
|
|
|
3484 |
my $i;
|
|
|
3485 |
for ($i=0; $i < length $dels; $i++) {
|
|
|
3486 |
my $del = quotemeta substr ($dels, $i, 1);
|
|
|
3487 |
my $esc = length($escs) ? quotemeta substr ($escs, $i, 1) : "";
|
|
|
3488 |
if ($del eq $esc) {
|
|
|
3489 |
push @pat,
|
|
|
3490 |
"(?k:$del)(?k:[^$del]*(?:(?:$del$del)[^$del]*)*)(?k:$del)";
|
|
|
3491 |
}
|
|
|
3492 |
elsif (length $esc) {
|
|
|
3493 |
push @pat,
|
|
|
3494 |
"(?k:$del)(?k:[^$esc$del]*(?:$esc.[^$esc$del]*)*)(?k:$del)";
|
|
|
3495 |
}
|
|
|
3496 |
else {
|
|
|
3497 |
push @pat, "(?k:$del)(?k:[^$del]*)(?k:$del)";
|
|
|
3498 |
}
|
|
|
3499 |
}
|
|
|
3500 |
my $pat = join '|', @pat;
|
|
|
3501 |
return "(?k:$pat)";
|
|
|
3502 |
}
|
|
|
3503 |
|
|
|
3504 |
sub _croak {
|
|
|
3505 |
require Carp;
|
|
|
3506 |
goto &Carp::croak;
|
|
|
3507 |
}
|
|
|
3508 |
|
|
|
3509 |
pattern name => [qw( delimited -delim= -esc=\\ )],
|
|
|
3510 |
create => sub {my $flags = $_[1];
|
|
|
3511 |
_croak 'Must specify delimiter in $RE{delimited}'
|
|
|
3512 |
unless length $flags->{-delim};
|
|
|
3513 |
return gen_delimited (@{$flags}{-delim, -esc});
|
|
|
3514 |
},
|
|
|
3515 |
;
|
|
|
3516 |
|
|
|
3517 |
pattern name => [qw( quoted -esc=\\ )],
|
|
|
3518 |
create => sub {my $flags = $_[1];
|
|
|
3519 |
return gen_delimited (q{"'`}, $flags -> {-esc});
|
|
|
3520 |
},
|
|
|
3521 |
;
|
|
|
3522 |
|
|
|
3523 |
|
|
|
3524 |
1;
|
|
|
3525 |
#
|
|
|
3526 |
# Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved.
|
|
|
3527 |
# This module is free software. It may be used, redistributed
|
|
|
3528 |
# and/or modified under the terms of the Perl Artistic License
|
|
|
3529 |
# (see http://www.perl.com/perl/misc/Artistic.html)
|
|
|
3530 |
EOD
|
|
|
3531 |
# 2}}}
|
|
|
3532 |
my $problems = 0;
|
|
|
3533 |
$HAVE_Rexexp_Common = 0;
|
|
|
3534 |
my $dir = "";
|
|
|
3535 |
if ($opt_sdir) {
|
|
|
3536 |
# write to the user-defined scratch directory
|
|
|
3537 |
$dir = $opt_sdir;
|
|
|
3538 |
} else {
|
|
|
3539 |
# let File::Temp create a suitable temporary directory
|
|
|
3540 |
$dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
|
3541 |
}
|
|
|
3542 |
print "Using temp dir [$dir] to install Regexp::Common\n" if $opt_v;
|
|
|
3543 |
my $Regexp_dir = "$dir/Regexp";
|
|
|
3544 |
my $Regexp_Common_dir = "$dir/Regexp/Common";
|
|
|
3545 |
mkdir $Regexp_dir ;
|
|
|
3546 |
mkdir $Regexp_Common_dir;
|
|
|
3547 |
|
|
|
3548 |
foreach my $module_file (keys %Regexp_Common_Contents) {
|
|
|
3549 |
my $OUT = new IO::File "$dir/Regexp/${module_file}.pm", "w";
|
|
|
3550 |
if (defined $OUT) {
|
|
|
3551 |
print $OUT $Regexp_Common_Contents{$module_file};
|
|
|
3552 |
$OUT->close;
|
|
|
3553 |
} else {
|
|
|
3554 |
warn "Failed to install Regexp::${module_file}.pm\n";
|
|
|
3555 |
$problems = 1;
|
|
|
3556 |
}
|
|
|
3557 |
}
|
|
|
3558 |
|
|
|
3559 |
push @INC, $dir;
|
|
|
3560 |
eval "use Regexp::Common qw /comment RE_comment_HTML balanced/";
|
|
|
3561 |
$HAVE_Rexexp_Common = 1 unless $problems;
|
|
|
3562 |
} # 1}}}
|
|
|
3563 |
sub call_regexp_common { # {{{1
|
|
|
3564 |
my ($ra_lines, $language ) = @_;
|
|
|
3565 |
print "-> call_regexp_common\n" if $opt_v > 2;
|
|
|
3566 |
|
|
|
3567 |
Install_Regexp_Common() unless $HAVE_Rexexp_Common;
|
|
|
3568 |
|
|
|
3569 |
my $all_lines = join("", @{$ra_lines});
|
|
|
3570 |
|
|
|
3571 |
no strict 'vars';
|
|
|
3572 |
# otherwise get:
|
|
|
3573 |
# Global symbol "%RE" requires explicit package name at cloc line xx.
|
|
|
3574 |
if ($all_lines =~ $RE{comment}{$language}) {
|
|
|
3575 |
# Suppress "Use of uninitialized value in regexp compilation" that
|
|
|
3576 |
# pops up when $1 is undefined--happens if there's a bug in the $RE
|
|
|
3577 |
# This Pascal comment will trigger it:
|
|
|
3578 |
# (* This is { another } test. **)
|
|
|
3579 |
# Curiously, testing for "defined $1" breaks the substitution.
|
|
|
3580 |
no warnings;
|
|
|
3581 |
|
|
|
3582 |
$all_lines =~ s/$1//g;
|
|
|
3583 |
}
|
|
|
3584 |
# a bogus use of %RE to avoid:
|
|
|
3585 |
# Name "main::RE" used only once: possible typo at cloc line xx.
|
|
|
3586 |
print scalar keys %RE if $opt_v < -20;
|
|
|
3587 |
|
|
|
3588 |
print "<- call_regexp_common\n" if $opt_v > 2;
|
|
|
3589 |
return split("\n", $all_lines);
|
|
|
3590 |
} # 1}}}
|
|
|
3591 |
sub plural_form { # {{{1
|
|
|
3592 |
# For getting the right plural form on some English nouns.
|
|
|
3593 |
my $n = shift @_;
|
|
|
3594 |
if ($n == 1) { return ( 1, "" ); }
|
|
|
3595 |
else { return ($n, "s"); }
|
|
|
3596 |
} # 1}}}
|
|
|
3597 |
sub matlab_or_objective_C { # {{{1
|
|
|
3598 |
# Decide if code is MATLAB, Objective C, or MUMPS
|
|
|
3599 |
my ($file , # in
|
|
|
3600 |
$rh_Err , # in hash of error codes
|
|
|
3601 |
$raa_errors , # out
|
|
|
3602 |
$rs_language , # out
|
|
|
3603 |
) = @_;
|
|
|
3604 |
|
|
|
3605 |
print "-> matlab_or_objective_C\n" if $opt_v > 2;
|
|
|
3606 |
# matlab markers:
|
|
|
3607 |
# first line starts with "function"
|
|
|
3608 |
# some lines start with "%"
|
|
|
3609 |
# high marks for lines that start with [
|
|
|
3610 |
#
|
|
|
3611 |
# Objective C markers:
|
|
|
3612 |
# must have at least two brace characters, { }
|
|
|
3613 |
# has /* ... */ style comments
|
|
|
3614 |
# some lines start with @
|
|
|
3615 |
# some lines start with #include
|
|
|
3616 |
#
|
|
|
3617 |
# MUMPS:
|
|
|
3618 |
# has ; comment markers
|
|
|
3619 |
# do not match: \w+\s*=\s*\w
|
|
|
3620 |
# lines begin with \s*\.?\w+\s+\w
|
|
|
3621 |
# high marks for lines that start with \s*K\s+ or \s*Kill\s+
|
|
|
3622 |
|
|
|
3623 |
${$rs_language} = "";
|
|
|
3624 |
my $IN = new IO::File $file, "r";
|
|
|
3625 |
if (!defined $IN) {
|
|
|
3626 |
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
|
3627 |
return;
|
|
|
3628 |
}
|
|
|
3629 |
|
|
|
3630 |
my $matlab_points = 0;
|
|
|
3631 |
my $objective_C_points = 0;
|
|
|
3632 |
my $mumps_points = 0;
|
|
|
3633 |
my $has_braces = 0;
|
|
|
3634 |
while (<$IN>) {
|
|
|
3635 |
++$has_braces if m/[{}]/;
|
|
|
3636 |
++$mumps_points if $. == 1 and m{^[A-Z]};
|
|
|
3637 |
if (m{^\s*/\*}) { # /*
|
|
|
3638 |
++$objective_C_points;
|
|
|
3639 |
--$matlab_points;
|
|
|
3640 |
} elsif (m{^\s*\w+\s*=\s*}) { # definitely not MUMPS
|
|
|
3641 |
--$mumps_points;
|
|
|
3642 |
} elsif (m{^\s*\.?\w\s+\w}) {
|
|
|
3643 |
++$mumps_points;
|
|
|
3644 |
} elsif (m{^\s*;}) {
|
|
|
3645 |
++$mumps_points;
|
|
|
3646 |
} elsif (m{^\s*#include}) {
|
|
|
3647 |
++$objective_C_points;
|
|
|
3648 |
--$matlab_points;
|
|
|
3649 |
} elsif (m{^\s*@(interface|implementation|protocol|public|protected|private|end)\s}o) {
|
|
|
3650 |
# Objective C without a doubt
|
|
|
3651 |
$objective_C_points = 1;
|
|
|
3652 |
$matlab_points = 0;
|
|
|
3653 |
last;
|
|
|
3654 |
} elsif (m{^\s*\[}) { # line starts with [ -- very matlab
|
|
|
3655 |
$matlab_points += 5;
|
|
|
3656 |
} elsif (m{^\sK(ill)?\s+}) {
|
|
|
3657 |
$mumps_points += 5;
|
|
|
3658 |
} elsif (m{^\s*function}) {
|
|
|
3659 |
--$objective_C_points;
|
|
|
3660 |
++$matlab_points;
|
|
|
3661 |
} elsif (m{^\s*%}) { # %
|
|
|
3662 |
--$objective_C_points;
|
|
|
3663 |
++$matlab_points;
|
|
|
3664 |
++$mumps_points;
|
|
|
3665 |
}
|
|
|
3666 |
}
|
|
|
3667 |
$IN->close;
|
|
|
3668 |
|
|
|
3669 |
print "<- matlab_or_objective_C(matlab=$matlab_points, C=$objective_C_points, mumps=$mumps_points)\n"
|
|
|
3670 |
if $opt_v > 2;
|
|
|
3671 |
$objective_C_points = -9.9e20 unless $has_braces >= 2;
|
|
|
3672 |
if (($matlab_points > $objective_C_points) and
|
|
|
3673 |
($matlab_points > $mumps_points) ) {
|
|
|
3674 |
${$rs_language} = "MATLAB";
|
|
|
3675 |
} elsif (($mumps_points > $objective_C_points) and
|
|
|
3676 |
($mumps_points > $matlab_points) ) {
|
|
|
3677 |
${$rs_language} = "MUMPS";
|
|
|
3678 |
} else {
|
|
|
3679 |
${$rs_language} = "Objective C";
|
|
|
3680 |
}
|
|
|
3681 |
|
|
|
3682 |
} # 1}}}
|
|
|
3683 |
|
|
|
3684 |
# subroutines copied from SLOCCount
|
|
|
3685 |
my %lex_files = (); # really_is_lex()
|
|
|
3686 |
my %expect_files = (); # really_is_expect()
|
|
|
3687 |
my %pascal_files = (); # really_is_pascal(), really_is_incpascal()
|
|
|
3688 |
my %php_files = (); # really_is_php()
|
|
|
3689 |
sub really_is_lex { # {{{1
|
|
|
3690 |
# Given filename, returns TRUE if its contents really is lex.
|
|
|
3691 |
# lex file must have "%%", "%{", and "%}".
|
|
|
3692 |
# In theory, a lex file doesn't need "%{" and "%}", but in practice
|
|
|
3693 |
# they all have them, and requiring them avoid mislabeling a
|
|
|
3694 |
# non-lexfile as a lex file.
|
|
|
3695 |
|
|
|
3696 |
my $filename = shift;
|
|
|
3697 |
chomp($filename);
|
|
|
3698 |
|
|
|
3699 |
my $is_lex = 0; # Value to determine.
|
|
|
3700 |
my $percent_percent = 0;
|
|
|
3701 |
my $percent_opencurly = 0;
|
|
|
3702 |
my $percent_closecurly = 0;
|
|
|
3703 |
|
|
|
3704 |
# Return cached result, if available:
|
|
|
3705 |
if ($lex_files{$filename}) { return $lex_files{$filename};}
|
|
|
3706 |
|
|
|
3707 |
open(LEX_FILE, "<$filename") ||
|
|
|
3708 |
die "Can't open $filename to determine if it's lex.\n";
|
|
|
3709 |
while(<LEX_FILE>) {
|
|
|
3710 |
$percent_percent++ if (m/^\s*\%\%/);
|
|
|
3711 |
$percent_opencurly++ if (m/^\s*\%\{/);
|
|
|
3712 |
$percent_closecurly++ if (m/^\s*\%\}/);
|
|
|
3713 |
}
|
|
|
3714 |
close(LEX_FILE);
|
|
|
3715 |
|
|
|
3716 |
if ($percent_percent && $percent_opencurly && $percent_closecurly)
|
|
|
3717 |
{$is_lex = 1;}
|
|
|
3718 |
|
|
|
3719 |
$lex_files{$filename} = $is_lex; # Store result in cache.
|
|
|
3720 |
|
|
|
3721 |
return $is_lex;
|
|
|
3722 |
} # 1}}}
|
|
|
3723 |
sub really_is_expect { # {{{1
|
|
|
3724 |
# Given filename, returns TRUE if its contents really are Expect.
|
|
|
3725 |
# Many "exp" files (such as in Apache and Mesa) are just "export" data,
|
|
|
3726 |
# summarizing something else # (e.g., its interface).
|
|
|
3727 |
# Sometimes (like in RPM) it's just misc. data.
|
|
|
3728 |
# Thus, we need to look at the file to determine
|
|
|
3729 |
# if it's really an "expect" file.
|
|
|
3730 |
|
|
|
3731 |
my $filename = shift;
|
|
|
3732 |
chomp($filename);
|
|
|
3733 |
|
|
|
3734 |
# The heuristic is as follows: it's Expect _IF_ it:
|
|
|
3735 |
# 1. has "load_lib" command and either "#" comments or {}.
|
|
|
3736 |
# 2. {, }, and one of: proc, if, [...], expect
|
|
|
3737 |
|
|
|
3738 |
my $is_expect = 0; # Value to determine.
|
|
|
3739 |
|
|
|
3740 |
my $begin_brace = 0; # Lines that begin with curly braces.
|
|
|
3741 |
my $end_brace = 0; # Lines that begin with curly braces.
|
|
|
3742 |
my $load_lib = 0; # Lines with the Load_lib command.
|
|
|
3743 |
my $found_proc = 0;
|
|
|
3744 |
my $found_if = 0;
|
|
|
3745 |
my $found_brackets = 0;
|
|
|
3746 |
my $found_expect = 0;
|
|
|
3747 |
my $found_pound = 0;
|
|
|
3748 |
|
|
|
3749 |
# Return cached result, if available:
|
|
|
3750 |
if ($expect_files{$filename}) { return expect_files{$filename};}
|
|
|
3751 |
|
|
|
3752 |
open(EXPECT_FILE, "<$filename") ||
|
|
|
3753 |
die "Can't open $filename to determine if it's expect.\n";
|
|
|
3754 |
while(<EXPECT_FILE>) {
|
|
|
3755 |
|
|
|
3756 |
if (m/#/) {$found_pound++; s/#.*//;}
|
|
|
3757 |
if (m/^\s*\{/) { $begin_brace++;}
|
|
|
3758 |
if (m/\{\s*$/) { $begin_brace++;}
|
|
|
3759 |
if (m/^\s*\}/) { $end_brace++;}
|
|
|
3760 |
if (m/\};?\s*$/) { $end_brace++;}
|
|
|
3761 |
if (m/^\s*load_lib\s+\S/) { $load_lib++;}
|
|
|
3762 |
if (m/^\s*proc\s/) { $found_proc++;}
|
|
|
3763 |
if (m/^\s*if\s/) { $found_if++;}
|
|
|
3764 |
if (m/\[.*\]/) { $found_brackets++;}
|
|
|
3765 |
if (m/^\s*expect\s/) { $found_expect++;}
|
|
|
3766 |
}
|
|
|
3767 |
close(EXPECT_FILE);
|
|
|
3768 |
|
|
|
3769 |
if ($load_lib && ($found_pound || ($begin_brace && $end_brace)))
|
|
|
3770 |
{$is_expect = 1;}
|
|
|
3771 |
if ( $begin_brace && $end_brace &&
|
|
|
3772 |
($found_proc || $found_if || $found_brackets || $found_expect))
|
|
|
3773 |
{$is_expect = 1;}
|
|
|
3774 |
|
|
|
3775 |
$expect_files{$filename} = $is_expect; # Store result in cache.
|
|
|
3776 |
|
|
|
3777 |
return $is_expect;
|
|
|
3778 |
} # 1}}}
|
|
|
3779 |
sub really_is_pascal { # {{{1
|
|
|
3780 |
# Given filename, returns TRUE if its contents really are Pascal.
|
|
|
3781 |
|
|
|
3782 |
# This isn't as obvious as it seems.
|
|
|
3783 |
# Many ".p" files are Perl files
|
|
|
3784 |
# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
|
|
|
3785 |
# others are C extractions
|
|
|
3786 |
# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
|
|
|
3787 |
# and some files in linuxconf).
|
|
|
3788 |
# However, test files in "p2c" really are Pascal, for example.
|
|
|
3789 |
|
|
|
3790 |
# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
|
|
|
3791 |
# is actually C code. The heuristics determine that they're not Pascal,
|
|
|
3792 |
# but because it ends in ".p" it's not counted as C code either.
|
|
|
3793 |
# I believe this is actually correct behavior, because frankly it
|
|
|
3794 |
# looks like it's automatically generated (it's a bitmap expressed as code).
|
|
|
3795 |
# Rather than guess otherwise, we don't include it in a list of
|
|
|
3796 |
# source files. Let's face it, someone who creates C files ending in ".p"
|
|
|
3797 |
# and expects them to be counted by default as C files in SLOCCount needs
|
|
|
3798 |
# their head examined. I suggest examining their head
|
|
|
3799 |
# with a sucker rod (see syslogd(8) for more on sucker rods).
|
|
|
3800 |
|
|
|
3801 |
# This heuristic counts as Pascal such files such as:
|
|
|
3802 |
# /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
|
|
|
3803 |
# Which is hand-generated. We don't count woven documents now anyway,
|
|
|
3804 |
# so this is justifiable.
|
|
|
3805 |
|
|
|
3806 |
my $filename = shift;
|
|
|
3807 |
chomp($filename);
|
|
|
3808 |
|
|
|
3809 |
# The heuristic is as follows: it's Pascal _IF_ it has all of the following
|
|
|
3810 |
# (ignoring {...} and (*...*) comments):
|
|
|
3811 |
# 1. "^..program NAME" or "^..unit NAME",
|
|
|
3812 |
# 2. "procedure", "function", "^..interface", or "^..implementation",
|
|
|
3813 |
# 3. a "begin", and
|
|
|
3814 |
# 4. it ends with "end.",
|
|
|
3815 |
#
|
|
|
3816 |
# Or it has all of the following:
|
|
|
3817 |
# 1. "^..module NAME" and
|
|
|
3818 |
# 2. it ends with "end.".
|
|
|
3819 |
#
|
|
|
3820 |
# Or it has all of the following:
|
|
|
3821 |
# 1. "^..program NAME",
|
|
|
3822 |
# 2. a "begin", and
|
|
|
3823 |
# 3. it ends with "end.".
|
|
|
3824 |
#
|
|
|
3825 |
# The "end." requirements in particular filter out non-Pascal.
|
|
|
3826 |
#
|
|
|
3827 |
# Note (jgb): this does not detect Pascal main files in fpc, like
|
|
|
3828 |
# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
|
|
|
3829 |
# it
|
|
|
3830 |
|
|
|
3831 |
my $is_pascal = 0; # Value to determine.
|
|
|
3832 |
|
|
|
3833 |
my $has_program = 0;
|
|
|
3834 |
my $has_unit = 0;
|
|
|
3835 |
my $has_module = 0;
|
|
|
3836 |
my $has_procedure_or_function = 0;
|
|
|
3837 |
my $found_begin = 0;
|
|
|
3838 |
my $found_terminating_end = 0;
|
|
|
3839 |
my $has_begin = 0;
|
|
|
3840 |
|
|
|
3841 |
# Return cached result, if available:
|
|
|
3842 |
if ($pascal_files{$filename}) { return pascal_files{$filename};}
|
|
|
3843 |
|
|
|
3844 |
open(PASCAL_FILE, "<$filename") ||
|
|
|
3845 |
die "Can't open $filename to determine if it's pascal.\n";
|
|
|
3846 |
while(<PASCAL_FILE>) {
|
|
|
3847 |
s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective.
|
|
|
3848 |
s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective.
|
|
|
3849 |
if (m/\bprogram\s+[A-Za-z]/i) {$has_program=1;}
|
|
|
3850 |
if (m/\bunit\s+[A-Za-z]/i) {$has_unit=1;}
|
|
|
3851 |
if (m/\bmodule\s+[A-Za-z]/i) {$has_module=1;}
|
|
|
3852 |
if (m/\bprocedure\b/i) { $has_procedure_or_function = 1; }
|
|
|
3853 |
if (m/\bfunction\b/i) { $has_procedure_or_function = 1; }
|
|
|
3854 |
if (m/^\s*interface\s+/i) { $has_procedure_or_function = 1; }
|
|
|
3855 |
if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
|
|
|
3856 |
if (m/\bbegin\b/i) { $has_begin = 1; }
|
|
|
3857 |
# Originally I said:
|
|
|
3858 |
# "This heuristic fails if there are multi-line comments after
|
|
|
3859 |
# "end."; I haven't seen that in real Pascal programs:"
|
|
|
3860 |
# But jgb found there are a good quantity of them in Debian, specially in
|
|
|
3861 |
# fpc (at the end of a lot of files there is a multiline comment
|
|
|
3862 |
# with the changelog for the file).
|
|
|
3863 |
# Therefore, assume Pascal if "end." appears anywhere in the file.
|
|
|
3864 |
if (m/end\.\s*$/i) {$found_terminating_end = 1;}
|
|
|
3865 |
# elsif (m/\S/) {$found_terminating_end = 0;}
|
|
|
3866 |
}
|
|
|
3867 |
close(PASCAL_FILE);
|
|
|
3868 |
|
|
|
3869 |
# Okay, we've examined the entire file looking for clues;
|
|
|
3870 |
# let's use those clues to determine if it's really Pascal:
|
|
|
3871 |
|
|
|
3872 |
if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
|
|
|
3873 |
$has_begin && $found_terminating_end ) ||
|
|
|
3874 |
( $has_module && $found_terminating_end ) ||
|
|
|
3875 |
( $has_program && $has_begin && $found_terminating_end ) )
|
|
|
3876 |
{$is_pascal = 1;}
|
|
|
3877 |
|
|
|
3878 |
$pascal_files{$filename} = $is_pascal; # Store result in cache.
|
|
|
3879 |
|
|
|
3880 |
return $is_pascal;
|
|
|
3881 |
} # 1}}}
|
|
|
3882 |
sub really_is_incpascal { # {{{1
|
|
|
3883 |
# Given filename, returns TRUE if its contents really are Pascal.
|
|
|
3884 |
# For .inc files (mainly seen in fpc)
|
|
|
3885 |
|
|
|
3886 |
my $filename = shift;
|
|
|
3887 |
chomp($filename);
|
|
|
3888 |
|
|
|
3889 |
# The heuristic is as follows: it is Pacal if any of the following:
|
|
|
3890 |
# 1. really_is_pascal returns true
|
|
|
3891 |
# 2. Any usual reserverd word is found (program, unit, const, begin...)
|
|
|
3892 |
|
|
|
3893 |
# If the general routine for Pascal files works, we have it
|
|
|
3894 |
if (&really_is_pascal ($filename)) {
|
|
|
3895 |
$pascal_files{$filename} = 1;
|
|
|
3896 |
return 1;
|
|
|
3897 |
}
|
|
|
3898 |
|
|
|
3899 |
my $is_pascal = 0; # Value to determine.
|
|
|
3900 |
my $found_begin = 0;
|
|
|
3901 |
|
|
|
3902 |
open(PASCAL_FILE, "<$filename") ||
|
|
|
3903 |
die "Can't open $filename to determine if it's pascal.\n";
|
|
|
3904 |
while(<PASCAL_FILE>) {
|
|
|
3905 |
s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective.
|
|
|
3906 |
s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective.
|
|
|
3907 |
if (m/\bprogram\s+[A-Za-z]/i) {$is_pascal=1;}
|
|
|
3908 |
if (m/\bunit\s+[A-Za-z]/i) {$is_pascal=1;}
|
|
|
3909 |
if (m/\bmodule\s+[A-Za-z]/i) {$is_pascal=1;}
|
|
|
3910 |
if (m/\bprocedure\b/i) {$is_pascal = 1; }
|
|
|
3911 |
if (m/\bfunction\b/i) {$is_pascal = 1; }
|
|
|
3912 |
if (m/^\s*interface\s+/i) {$is_pascal = 1; }
|
|
|
3913 |
if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
|
|
|
3914 |
if (m/\bconstant\s+/i) {$is_pascal=1;}
|
|
|
3915 |
if (m/\bbegin\b/i) { $found_begin = 1; }
|
|
|
3916 |
if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
|
|
|
3917 |
if ($is_pascal) {
|
|
|
3918 |
last;
|
|
|
3919 |
}
|
|
|
3920 |
}
|
|
|
3921 |
|
|
|
3922 |
close(PASCAL_FILE);
|
|
|
3923 |
$pascal_files{$filename} = $is_pascal; # Store result in cache.
|
|
|
3924 |
return $is_pascal;
|
|
|
3925 |
} # 1}}}
|
|
|
3926 |
sub really_is_php { # {{{1
|
|
|
3927 |
# Given filename, returns TRUE if its contents really is php.
|
|
|
3928 |
|
|
|
3929 |
my $filename = shift;
|
|
|
3930 |
chomp($filename);
|
|
|
3931 |
|
|
|
3932 |
my $is_php = 0; # Value to determine.
|
|
|
3933 |
# Need to find a matching pair of surrounds, with ending after beginning:
|
|
|
3934 |
my $normal_surround = 0; # <?; bit 0 = <?, bit 1 = ?>
|
|
|
3935 |
my $script_surround = 0; # <script..>; bit 0 = <script language="php">
|
|
|
3936 |
my $asp_surround = 0; # <%; bit 0 = <%, bit 1 = %>
|
|
|
3937 |
|
|
|
3938 |
# Return cached result, if available:
|
|
|
3939 |
if ($php_files{$filename}) { return $php_files{$filename};}
|
|
|
3940 |
|
|
|
3941 |
open(PHP_FILE, "<$filename") ||
|
|
|
3942 |
die "Can't open $filename to determine if it's php.\n";
|
|
|
3943 |
while(<PHP_FILE>) {
|
|
|
3944 |
if (m/\<\?/) { $normal_surround |= 1; }
|
|
|
3945 |
if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
|
|
|
3946 |
if (m/\<script.*language="?php"?/i) { $script_surround |= 1; }
|
|
|
3947 |
if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
|
|
|
3948 |
if (m/\<\%/) { $asp_surround |= 1; }
|
|
|
3949 |
if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
|
|
|
3950 |
}
|
|
|
3951 |
close(PHP_FILE);
|
|
|
3952 |
|
|
|
3953 |
if ( ($normal_surround == 3) || ($script_surround == 3) ||
|
|
|
3954 |
($asp_surround == 3)) {
|
|
|
3955 |
$is_php = 1;
|
|
|
3956 |
}
|
|
|
3957 |
|
|
|
3958 |
$php_files{$filename} = $is_php; # Store result in cache.
|
|
|
3959 |
|
|
|
3960 |
return $is_php;
|
|
|
3961 |
} # 1}}}
|
|
|
3962 |
__END__
|
|
|
3963 |
mode values (stat $item)[2]
|
|
|
3964 |
Unix Windows
|
|
|
3965 |
file: 33188 33206
|
|
|
3966 |
dir : 16832 16895
|
|
|
3967 |
link: 33261 33206
|
|
|
3968 |
pipe: 4544 null
|