#!/usr/bin/perl ( $orgcorpus, $newcorpus ) = @ARGV; open ORG, $orgcorpus or die "Can not open the org corpus!\n"; open NEW, " > $newcorpus " or die "Can not open the new corpus file\n"; while( ) { chomp; s/\w+\///g; s/\w+'\w+\///g; s/\'\'\///g; s/\``\///g; s/.\///g; s/^\s+//; s/\s+$//; print NEW "$_\n"; }