| 1293 |
dpurdie |
1 |
###############################################################################
|
|
|
2 |
# Codestriker: Copyright (c) 2001, 2002, 2003 David Sitsky.
|
|
|
3 |
# All rights reserved.
|
|
|
4 |
# sits@users.sourceforge.net
|
|
|
5 |
#
|
|
|
6 |
# This program is free software; you can redistribute it and modify it under
|
|
|
7 |
# the terms of the GPL.
|
|
|
8 |
|
|
|
9 |
# Main delegate parser object, which tries a bunch of parsers to determine what
|
|
|
10 |
# format the input review is in. The last resort, is to view it as
|
|
|
11 |
# unstructured text, if it is content-type plain/text, otherwise indicate
|
|
|
12 |
# an error.
|
|
|
13 |
|
|
|
14 |
package Codestriker::FileParser::Parser;
|
|
|
15 |
|
|
|
16 |
use strict;
|
|
|
17 |
|
|
|
18 |
use FileHandle;
|
|
|
19 |
use File::Temp qw/ tempfile /;
|
|
|
20 |
|
|
|
21 |
use Codestriker::FileParser::CvsUnidiff;
|
|
|
22 |
use Codestriker::FileParser::SubversionDiff;
|
|
|
23 |
use Codestriker::FileParser::PerforceDescribe;
|
|
|
24 |
use Codestriker::FileParser::PerforceDiff;
|
|
|
25 |
use Codestriker::FileParser::VssDiff;
|
|
|
26 |
use Codestriker::FileParser::PatchUnidiff;
|
|
|
27 |
use Codestriker::FileParser::PatchBasicDiff;
|
|
|
28 |
use Codestriker::FileParser::ClearCaseSerialDiff;
|
|
|
29 |
use Codestriker::FileParser::UnknownFormat;
|
|
|
30 |
|
|
|
31 |
# Given the content-type and the file handle, try to determine what files,
|
|
|
32 |
# lines, revisions and diffs have been submitted in this review.
|
|
|
33 |
sub parse ($$$$$$) {
|
|
|
34 |
my ($type, $fh, $content_type, $repository, $topicid,
|
|
|
35 |
$uploaded_filename) = @_;
|
|
|
36 |
|
|
|
37 |
# Diffs found.
|
|
|
38 |
my @diffs = ();
|
|
|
39 |
|
|
|
40 |
# This is a pain, but to handle diffs produced on a windoze box, which
|
|
|
41 |
# uses \r\n endings, rather than making each parser object take this
|
|
|
42 |
# into account, create a temporary file here which removes them, and
|
|
|
43 |
# that file handle is passed on to the parser objects, so they aren't
|
|
|
44 |
# the wiser. Note the temporary file is automatically deleted
|
|
|
45 |
# once this function has finished.
|
|
|
46 |
my $tmpfh;
|
|
|
47 |
if (defined $Codestriker::tmpdir && $Codestriker::tmpdir ne "") {
|
|
|
48 |
$tmpfh = tempfile(DIR => $Codestriker::tmpdir);
|
|
|
49 |
}
|
|
|
50 |
else {
|
|
|
51 |
$tmpfh = tempfile();
|
|
|
52 |
}
|
|
|
53 |
binmode $tmpfh, ':utf8';
|
|
|
54 |
|
|
|
55 |
if (!$tmpfh) {
|
|
|
56 |
die "Unable to create temporary parse file: $!";
|
|
|
57 |
}
|
|
|
58 |
|
|
|
59 |
binmode $fh;
|
|
|
60 |
my $first_line = 1;
|
|
|
61 |
while (<$fh>) {
|
|
|
62 |
if ($first_line) {
|
|
|
63 |
# Remove the UTF8 BOM if it exists.
|
|
|
64 |
s/^\xEF\xBB\xBF//o;
|
|
|
65 |
$first_line = 0;
|
|
|
66 |
}
|
|
|
67 |
my $line = Codestriker::decode_topic_text($_);
|
|
|
68 |
$line =~ s/\r\n/\n/go;
|
|
|
69 |
print $tmpfh $line;
|
|
|
70 |
}
|
|
|
71 |
|
|
|
72 |
# Rewind the file, then let the parsers have at it.
|
|
|
73 |
seek($tmpfh,0,0) ||
|
|
|
74 |
die "Unable to seek to the start of the temporary file: $!";
|
|
|
75 |
|
|
|
76 |
# If the file is plain/text, try all of the text parsers.
|
|
|
77 |
if ($content_type eq "text/plain") {
|
|
|
78 |
|
|
|
79 |
# Check if it is a CVS unidiff file.
|
|
|
80 |
if ($#diffs == -1) {
|
|
|
81 |
seek($tmpfh, 0, 0) ||
|
|
|
82 |
die "Unable to seek to the start of the temporary file: $!";
|
|
|
83 |
@diffs =
|
|
|
84 |
Codestriker::FileParser::CvsUnidiff->parse($tmpfh,
|
|
|
85 |
$repository);
|
|
|
86 |
}
|
|
|
87 |
|
|
|
88 |
# Check if it is a Subversion diff file.
|
|
|
89 |
if ($#diffs == -1) {
|
|
|
90 |
seek($tmpfh, 0, 0) ||
|
|
|
91 |
die "Unable to seek to the start of the temporary file: $!";
|
|
|
92 |
@diffs =
|
|
|
93 |
Codestriker::FileParser::SubversionDiff->parse($tmpfh,
|
|
|
94 |
$repository);
|
|
|
95 |
}
|
|
|
96 |
|
|
|
97 |
# Check if it is a Perforce describe file.
|
|
|
98 |
if ($#diffs == -1) {
|
|
|
99 |
seek($tmpfh, 0, 0) ||
|
|
|
100 |
die "Unable to seek to the start of the temporary file: $!";
|
|
|
101 |
@diffs =
|
|
|
102 |
Codestriker::FileParser::PerforceDescribe->parse($tmpfh,
|
|
|
103 |
$repository);
|
|
|
104 |
}
|
|
|
105 |
|
|
|
106 |
# Check if it is a Perforce diff file.
|
|
|
107 |
if ($#diffs == -1) {
|
|
|
108 |
seek($tmpfh, 0, 0) ||
|
|
|
109 |
die "Unable to seek to the start of the temporary file: $!";
|
|
|
110 |
@diffs =
|
|
|
111 |
Codestriker::FileParser::PerforceDiff->parse($tmpfh,
|
|
|
112 |
$repository);
|
|
|
113 |
}
|
|
|
114 |
|
|
|
115 |
# Check if it is a VSS diff file.
|
|
|
116 |
if ($#diffs == -1) {
|
|
|
117 |
seek($tmpfh, 0, 0) ||
|
|
|
118 |
die "Unable to seek to the start of the temporary file: $!";
|
|
|
119 |
@diffs =
|
|
|
120 |
Codestriker::FileParser::VssDiff->parse($tmpfh,
|
|
|
121 |
$repository);
|
|
|
122 |
}
|
|
|
123 |
|
|
|
124 |
# Check if it is a patch unidiff file.
|
|
|
125 |
if ($#diffs == -1) {
|
|
|
126 |
seek($tmpfh, 0, 0) ||
|
|
|
127 |
die "Unable to seek to the start of the temporary file: $!";
|
|
|
128 |
@diffs =
|
|
|
129 |
Codestriker::FileParser::PatchUnidiff->parse($tmpfh,
|
|
|
130 |
$repository);
|
|
|
131 |
}
|
|
|
132 |
|
|
|
133 |
# Check if it is a patch basic file.
|
|
|
134 |
if ($#diffs == -1) {
|
|
|
135 |
seek($tmpfh, 0, 0) ||
|
|
|
136 |
die "Unable to seek to the start of the temporary file: $!";
|
|
|
137 |
@diffs =
|
|
|
138 |
Codestriker::FileParser::PatchBasicDiff->parse($tmpfh,
|
|
|
139 |
$uploaded_filename);
|
|
|
140 |
}
|
|
|
141 |
|
|
|
142 |
# Check if it is a ClearCase serial diff file.
|
|
|
143 |
if ($#diffs == -1) {
|
|
|
144 |
seek($tmpfh, 0, 0) ||
|
|
|
145 |
die "Unable to seek to the start of the temporary file: $!";
|
|
|
146 |
@diffs =
|
|
|
147 |
Codestriker::FileParser::ClearCaseSerialDiff->parse($tmpfh,
|
|
|
148 |
$repository);
|
|
|
149 |
}
|
|
|
150 |
|
|
|
151 |
# Last stop-gap - the file format is unknown, treat it as a
|
|
|
152 |
# single file with filename "unknown".
|
|
|
153 |
if ($#diffs == -1) {
|
|
|
154 |
if (! defined $uploaded_filename || $uploaded_filename eq '') {
|
|
|
155 |
$uploaded_filename = 'unknown.txt';
|
|
|
156 |
}
|
|
|
157 |
seek($tmpfh, 0, 0) ||
|
|
|
158 |
die "Unable to seek to the start of the temporary file: $!";
|
|
|
159 |
@diffs = Codestriker::FileParser::UnknownFormat->
|
|
|
160 |
parse($tmpfh, $uploaded_filename);
|
|
|
161 |
}
|
|
|
162 |
} elsif ($content_type eq "application/gzip" ||
|
|
|
163 |
$content_type eq "application/x-gzip") {
|
|
|
164 |
# Check if it is a gzip file.
|
|
|
165 |
|
|
|
166 |
} elsif ($content_type eq "application/zip" ||
|
|
|
167 |
$content_type eq "application/x-zip") {
|
|
|
168 |
# Check if it is a zip file.
|
|
|
169 |
}
|
|
|
170 |
|
|
|
171 |
# Restore the offset back to the start of the file again.
|
|
|
172 |
seek($fh, 0, 0) ||
|
|
|
173 |
die "Unable to seek to the start of the temporary file. $!";
|
|
|
174 |
|
|
|
175 |
# Sort the diff chunks by filename, then old linenumber.
|
|
|
176 |
@diffs = sort { $a->{filename} cmp $b->{filename} ||
|
|
|
177 |
$a->{old_linenumber} <=> $b->{old_linenumber} } @diffs;
|
|
|
178 |
|
|
|
179 |
# Only include those files whose extension is not in
|
|
|
180 |
# @Codestriker::exclude_file_types, provided it is defined.
|
|
|
181 |
return @diffs unless defined @Codestriker::exclude_file_types;
|
|
|
182 |
|
|
|
183 |
my @trimmed_diffs = ();
|
|
|
184 |
foreach my $curr (@diffs) {
|
|
|
185 |
if ($curr->{filename} =~ /\.([^\.]+)(,v)?$/o) {
|
|
|
186 |
my $ext = $1;
|
|
|
187 |
push @trimmed_diffs, $curr
|
|
|
188 |
unless grep { $_ eq $ext } @Codestriker::exclude_file_types;
|
|
|
189 |
} else {
|
|
|
190 |
# No extension on this file, add the diff in.
|
|
|
191 |
push @trimmed_diffs, $curr;
|
|
|
192 |
}
|
|
|
193 |
}
|
|
|
194 |
|
|
|
195 |
# Return the diffs found, if any.
|
|
|
196 |
return @trimmed_diffs;
|
|
|
197 |
}
|
|
|
198 |
|
|
|
199 |
1;
|
|
|
200 |
|