# MacroPurge v0.1 by W. Black.
#
# Runs under Perl in Win32 to clean and Document_* macros in
# Word '97 Documents.  It may work in other versions of Word,
# but I haven't tested it.  It requires both a Win32 version
# of Perl be installed (like the one at 
# http://www.activestate.com) and Microsoft Word.  Bug reports,
# feature requests, and hate mail to wjblack@yahoo.com.
#
# This module is Copyright © 2000 William Black. All rights 
# reserved. This script is free software; you can redistribute 
# it and/or modify it under the same terms as Perl itself. 
# This script is distributed in the hope that it will be useful, 
# but WITHOUT ANY WARRANTY; without even the implied warranty 
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. The 
# copyright holder of this script can not be held liable for 
# any general, special, incidental or consequential damages 
# arising out of the use of the script.
#
# Microsoft is probably a trademark of Microsoft Corporation.
# I sincerely hope "word" isn't, but "Microsoft Word" probably
# is.  My humblest apologies to their attorneys if I have
# offended them in any way.  :-)
#
# There's even POD dox at the bottom of this thing if anyone's
# interested.  ;-)


# You should already have this if you've installed the
# ActiveState distro.
use Win32::OLE;

# Scan and descend everything under the listed directory.
sub scanDir {
  my($thePath) = shift(@_);

  # This function is recursive, so we need to specify this...
  local *SCANDIR;

  # This bit needs a little rework to be more tolerant
  opendir SCANDIR, $thePath || die "Couldn't open the directory!\n";

  # Iterate through all files in the directory
  while( $theFile = readdir(SCANDIR) )
  {
    $fullFile = $thePath . "\\" . $theFile;
    @fileStat = stat($fullFile);

    # If this is a non-special directory, then descend it.
    if(!($fileStat[2] & 0x8000) && 
       !($theFile eq "." || $theFile eq '..')) {
      print "Scanning " . $fullFile . "...\n";
      scanDir($fullFile);
    }

    # Otherwise, check for a regular file and a .DOC on the end.
    if(($fileStat[2] & 0x8000) && 
       (lc(substr($fullFile,length($fullfile)-4,4)) eq ".doc")) {
      cleanFile($fullFile);
    }
  }
}

# Presumably the file is a Word DOC, so scan and clean as
# appropriate.
sub cleanFile {
  my($fullFile) = shift(@_);

  # Open the file for read.  Rework to skip the unable to open cases 
  # (or log them).
  open SCANFILE,$fullFile || die "Couldn't open the file!\n";

  # For Win32
  binmode SCANFILE;

  # Show that we found/are working on this file (probably should be 
  # reworked to allow for a variety of output/logging)
  print $fullFile . "\n";
  $i = 0;
  while (<SCANFILE>)
  {
    # Fortunately, we can check for the yucky cases binarily...
    if( !$i && (/Document_Open/ || /Document_Close/ || /Document_New/) ) {
      $i = 1;
    }
  }
  # If we've found any baddies, kill 'em!
  if ( $i )
  {
    # Rewind the infile...
    seek SCANFILE, 0, SEEK_CUR;

    # Probably redundant
    binmode SCANFILE;

    # The output file
    open FRESHFILE,">" . $fullFile . ".clean";

    # for Win32
    binmode FRESHFILE;

    # Slurp in data line by line, disable the baddies.  We need to
    # do this because macro protection is nonfunctional on files
    # that are opened programmatically (and in case Word isn't 
    # installed, something is disabled...
    while (<SCANFILE>) {
      s/Document_Open/Disabled_Open/g;

      # I was originally going to just kill _Open, but I found that
      # Marker.C (and probably others) error the VB environment on
      # document close if the _Open module isn't there.  It is (I
      # guess) possible to write a virus that only kicks in on
      # _Close or _New...
      s/Document_Close/Disabled_Close/g;
      s/Document_New/Disabled_New/g;
      print FRESHFILE $_;
    }
    close SCANFILE;
    close FRESHFILE;
    # Play file rename games.  I did this in case something dies and
    # manual recovery is needed...
    rename $fullFile, $fullFile . ".dirty";
    rename $fullFile . ".clean", $fullFile;

    # OK, here's where it gets REALLY funky...
    # Fire up word and do the automated copy-paste thing :-O
    #
    # use existing instance if Word is already running
    unless (defined $wrd) {
      eval {$wrd = Win32::OLE->GetActiveObject('Word.Application')};
      die "Word not installed" if $@;
      unless (defined $wrd) {
        $wrd = Win32::OLE->new('Word.Application', sub {$_[0]->Quit;})
               or die "Can't Start Word!";
      }
    }
    # Let our listerers know what's up (it'll say done and \n 
    # later)...
    print "Disinfecting " . $fullFile . "...";

    # Open the old doc (disabled macros and all).  If I hadn't 
    # binary edited the macros into submission, they'd run right
    # now.
    my($oldDoc) = $wrd->Documents->Open( $fullFile ) or die "Aaarrgh!";

    # Create a fresh destination doc (this only works if Normal.dot
    # isn't corrupt, but you knew that, right?)
    my($newDoc) = $wrd->Documents->Add;

    # Copy...
    $oldDoc->Content->Copy;
    # Paste...
    $newDoc->Content->Paste;
    # Save...
    $newDoc->SaveAs( $fullFile . '.clean' );
    # Close...
    $oldDoc->Close;
    $newDoc->Close;
    # Kill the intermediary file...
    unlink $fullFile;
    # Rename the new one...
    rename $fullFile . ".clean", $fullFile;
    # We're done.  Kill the objects (I may want to rethink this
    # later, as this kills the Word session, too, I think...)
    undef $oldDoc;
    undef $newDoc;
    print "Done!\n";
  }
}

print "MacroPurge v0.1 by W. Black\n";
scanDir $ARGV[0] if($ARGV[0]);

#--------------------------------------------------------------

=head1 NAME

macropurge - Scan a directory for Microsoft Word '97 documents 
with 'Document_*' macros and remove the macros.

=head1 SYNOPSIS

macropurge path

=head1 DESCRIPTION

Runs under Perl in Win32 to clean and Document_* macros in Word
'97 Documents.  It may work in other versions of Word, but I 
haven't tested it.  It requires both a Win32 version of Perl be 
installed (like the one at http://www.activestate.com) and 
Microsoft Word.

=head1 README

MacroPurge v0.1 by W. Black.

Runs under Perl in Win32 to clean and Document_* macros in
Word '97 Documents.  It may work in other versions of Word,
but I haven't tested it.  It requires both a Win32 version
of Perl be installed (like the one at 
http://www.activestate.com) and Microsoft Word.  Bug reports,
feature requests, and hate mail to wjblack@yahoo.com.  I
will make every effort to support this thing as much as I
can, but there are no guarantees (I'm a UNIX & NT SysAdmin
and a SQL database programmer->no free time :-().

This module is Copyright © 2000 William Black. All rights 
reserved. This script is free software; you can redistribute 
it and/or modify it under the same terms as Perl itself. 
This script is distributed in the hope that it will be useful, 
but WITHOUT ANY WARRANTY; without even the implied warranty 
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. The 
copyright holder of this script can not be held liable for 
any general, special, incidental or consequential damages 
arising out of the use of the script.

Microsoft is probably a trademark of Microsoft Corporation.
I sincerely hope "word" isn't, but "Microsoft Word" probably
is.  My humblest apologies to their attorneys if I have
offended them in any way.  :-)

How to use this silly thing:

0.  Make sure ActivePerl and Word are installed on whatever 
    machine's doing the virus scanning and disable any other
    virus scanner that might prevent this one from getting 
    access to the infected files.
1.  If necessary, map a drive to whatever network share's 
    being scanned.
2.  Run the following:  macropurge.pl <fullPath>
    Example:            macropurge.pl Q:\Infected
3.  Make sure to get rid of .dirty files when you're done 
    (this'll probably be a command-line parameter in the 
    future).
4.  It's probably a good idea to replace everyone's 
    Normal.dot, as this thing can't look at that (yet).

Advantages of using this scanner:

0.  It's free (as in freedom) and free (as in beer :-).
1.  It totally kills any and all traces of macro viruses
    if used properly.  Bit-pattern virus scanners may just
    disable the virus, leaving other scanners able to
    detect a (now defunct) virus and making you look bad.
2.  It kills all known and unknown viruses with extreme
    prejudice.

Advantages of using a commercial antivirus package:

1.  They kill more kinds of viruses.
2.  They don't (usually) kill legitimate Document_* macros
    (though I'm not sure such a critter actually exists).
3.  They don't (usually) require that MS Word's installed.

How does this thing work?/Why does Word need to be 
installed?

This script looks for any .doc files.  It then does a
binary grep for 'Document_*' (which will never happen in
a Word Document that doesn't have at least a macro 
fragment--as actual content is always two-byte unicode).
It then binary edits 'Document_*' to be 'Disabled_*'.  This 
is enough to turn off the autorun macro, but doesn't kill 
the virus signature totally (as the code is still there 
and can be found by a bit-pattern scanner).  As a result, 
the script fires up a Word OLE object to open the document, 
copy the content, and paste it into a fresh doc, then saving 
it and renaming the old document to foo.doc.dirty.  This
whole process is only slow if there are a bunch of 
disinfections to be done, as the greps themselves are pretty 
quick...

TODO

I'd really like to remove the requirement for having Word.
The structured storage module looks like a good candidate
for doing this, but it's currently read-only.  If it happens
though, you'll be able to kill macro viruses on your Linux 
box or whatever...

I'd also like to modularize some of this instead of having
one monolithic script.  I'm still pretty new to Perl
(believe it or not, I'm a recent awk/sed convert), so I 
haven't figured out the whole .pm thing yet...

That's the end of this rambling readme.  Shoot me an email
at wjblack@yahoo.com if you love, hate, or generally want
to comment.

=head1 PREREQUISITES

This script requires Win32::OLE and a Win32 distro of Perl
(like the one at http://www.activestate.com) and Microsoft Word
'97 (or ?) to do anything approaching usefulness.

=pod OSNAMES
MSWin32

=pod SCRIPT CATEGORIES

Win32/Utilities

=cut
