############################################################################### # Codestriker: Copyright (c) 2001, 2002, 2003 David Sitsky. # All rights reserved. # sits@users.sourceforge.net # # This program is free software; you can redistribute it and modify it under # the terms of the GPL. # Main delegate parser object, which tries a bunch of parsers to determine what # format the input review is in. The last resort, is to view it as # unstructured text, if it is content-type plain/text, otherwise indicate # an error. package Codestriker::FileParser::Parser; use strict; use FileHandle; use File::Temp qw/ tempfile /; use Codestriker::FileParser::CvsUnidiff; use Codestriker::FileParser::SubversionDiff; use Codestriker::FileParser::PerforceDescribe; use Codestriker::FileParser::PerforceDiff; use Codestriker::FileParser::VssDiff; use Codestriker::FileParser::PatchUnidiff; use Codestriker::FileParser::PatchBasicDiff; use Codestriker::FileParser::ClearCaseSerialDiff; use Codestriker::FileParser::UnknownFormat; # Given the content-type and the file handle, try to determine what files, # lines, revisions and diffs have been submitted in this review. sub parse ($$$$$$) { my ($type, $fh, $content_type, $repository, $topicid, $uploaded_filename) = @_; # Diffs found. my @diffs = (); # This is a pain, but to handle diffs produced on a windoze box, which # uses \r\n endings, rather than making each parser object take this # into account, create a temporary file here which removes them, and # that file handle is passed on to the parser objects, so they aren't # the wiser. Note the temporary file is automatically deleted # once this function has finished. my $tmpfh; if (defined $Codestriker::tmpdir && $Codestriker::tmpdir ne "") { $tmpfh = tempfile(DIR => $Codestriker::tmpdir); } else { $tmpfh = tempfile(); } binmode $tmpfh, ':utf8'; if (!$tmpfh) { die "Unable to create temporary parse file: $!"; } binmode $fh; my $first_line = 1; while (<$fh>) { if ($first_line) { # Remove the UTF8 BOM if it exists. s/^\xEF\xBB\xBF//o; $first_line = 0; } my $line = Codestriker::decode_topic_text($_); $line =~ s/\r\n/\n/go; print $tmpfh $line; } # Rewind the file, then let the parsers have at it. seek($tmpfh,0,0) || die "Unable to seek to the start of the temporary file: $!"; # If the file is plain/text, try all of the text parsers. if ($content_type eq "text/plain") { # Check if it is a CVS unidiff file. if ($#diffs == -1) { seek($tmpfh, 0, 0) || die "Unable to seek to the start of the temporary file: $!"; @diffs = Codestriker::FileParser::CvsUnidiff->parse($tmpfh, $repository); } # Check if it is a Subversion diff file. if ($#diffs == -1) { seek($tmpfh, 0, 0) || die "Unable to seek to the start of the temporary file: $!"; @diffs = Codestriker::FileParser::SubversionDiff->parse($tmpfh, $repository); } # Check if it is a Perforce describe file. if ($#diffs == -1) { seek($tmpfh, 0, 0) || die "Unable to seek to the start of the temporary file: $!"; @diffs = Codestriker::FileParser::PerforceDescribe->parse($tmpfh, $repository); } # Check if it is a Perforce diff file. if ($#diffs == -1) { seek($tmpfh, 0, 0) || die "Unable to seek to the start of the temporary file: $!"; @diffs = Codestriker::FileParser::PerforceDiff->parse($tmpfh, $repository); } # Check if it is a VSS diff file. if ($#diffs == -1) { seek($tmpfh, 0, 0) || die "Unable to seek to the start of the temporary file: $!"; @diffs = Codestriker::FileParser::VssDiff->parse($tmpfh, $repository); } # Check if it is a patch unidiff file. if ($#diffs == -1) { seek($tmpfh, 0, 0) || die "Unable to seek to the start of the temporary file: $!"; @diffs = Codestriker::FileParser::PatchUnidiff->parse($tmpfh, $repository); } # Check if it is a patch basic file. if ($#diffs == -1) { seek($tmpfh, 0, 0) || die "Unable to seek to the start of the temporary file: $!"; @diffs = Codestriker::FileParser::PatchBasicDiff->parse($tmpfh, $uploaded_filename); } # Check if it is a ClearCase serial diff file. if ($#diffs == -1) { seek($tmpfh, 0, 0) || die "Unable to seek to the start of the temporary file: $!"; @diffs = Codestriker::FileParser::ClearCaseSerialDiff->parse($tmpfh, $repository); } # Last stop-gap - the file format is unknown, treat it as a # single file with filename "unknown". if ($#diffs == -1) { if (! defined $uploaded_filename || $uploaded_filename eq '') { $uploaded_filename = 'unknown.txt'; } seek($tmpfh, 0, 0) || die "Unable to seek to the start of the temporary file: $!"; @diffs = Codestriker::FileParser::UnknownFormat-> parse($tmpfh, $uploaded_filename); } } elsif ($content_type eq "application/gzip" || $content_type eq "application/x-gzip") { # Check if it is a gzip file. } elsif ($content_type eq "application/zip" || $content_type eq "application/x-zip") { # Check if it is a zip file. } # Restore the offset back to the start of the file again. seek($fh, 0, 0) || die "Unable to seek to the start of the temporary file. $!"; # Sort the diff chunks by filename, then old linenumber. @diffs = sort { $a->{filename} cmp $b->{filename} || $a->{old_linenumber} <=> $b->{old_linenumber} } @diffs; # Only include those files whose extension is not in # @Codestriker::exclude_file_types, provided it is defined. return @diffs unless defined @Codestriker::exclude_file_types; my @trimmed_diffs = (); foreach my $curr (@diffs) { if ($curr->{filename} =~ /\.([^\.]+)(,v)?$/o) { my $ext = $1; push @trimmed_diffs, $curr unless grep { $_ eq $ext } @Codestriker::exclude_file_types; } else { # No extension on this file, add the diff in. push @trimmed_diffs, $curr; } } # Return the diffs found, if any. return @trimmed_diffs; } 1;