#!/bin/perl # filename_changer.pl # Meryll Larkin # 20200615 # Replace illegal characters in Filenames AND DIRECTORY NAMES with safe characters # in preparation for tar backups # 20200625 Added report file # 20200625 Added processing for Directory names as well as filenames # Added exclamation point to illegal characters # Added testing for path and file name length use strict; use File::Basename; chomp (my $datestring =`date +"%Y%m%d"`); chomp (my $pid = $$); my $scriptname = basename($0); $scriptname =~ s/\.pl$//; my $REPORT = "out_${scriptname}_${datestring}_${pid}.log"; open (LOG, ">>$REPORT") || die "Unable to open log file $REPORT " . $! . "\n"; print LOG "$datestring $scriptname OUTPUT\n\n"; my @MORE_DIRECTORIES; my $MORE_DIR_BOO = 1; my @ERRORS; # option: permit user to provide starting directory as command line argument chomp (my $dir = $ARGV[0] ); print "dir = $dir\n"; my $DIR; if (! $dir ) { print "Please provide the FULL ABSOLUTE PATH of the directory you want recursively processed: "; chomp ($dir=()); } # remove final / on directory name if that is how it was provided $dir =~ s/\/$//; while ($MORE_DIR_BOO) { my $DIR_BAD_FILE_COUNT = 0; print LOG "\n\n$dir\n"; my $good_dir_name = &fix_filename($dir); if ( "$good_dir_name" ne "$dir" ) { if ( -e $good_dir_name) { &error_handler("$dir not changed $good_dir_name already exists"); } else { rename ($dir, $good_dir_name); $DIR_BAD_FILE_COUNT++; print LOG "$DIR_BAD_FILE_COUNT $dir $good_dir_name\n"; $dir = $good_dir_name; } } opendir ($DIR, $dir) || die "Unable to open $dir: " . $!; my @dirContents = (grep !/^[_\.]/, readdir ($DIR)); foreach my $filename (@dirContents) { $filename = ${dir} . '/' . $filename; print "filename = $filename\n"; if (-d $filename ) { print "$filename is a directory\n"; push (@MORE_DIRECTORIES, $filename); } if ( -f $filename ) { my $good_name = &fix_filename ($filename); if ( "$good_name" eq "$filename" ) { # we don't need to do anything, filename is already good next; } ##### IMPORTANT ###### # Before renaming a file, make sure another file does not already exist with that name # if ( -e $good_name) { &error_handler("$filename not changed $good_name already exists"); } else { print LOG "$DIR_BAD_FILE_COUNT $filename $good_name\n"; rename ($filename, $good_name); $DIR_BAD_FILE_COUNT++; } } } my $scalar = scalar @MORE_DIRECTORIES; print "scalar @MORE_DIRECTORIES = $scalar\n"; if ($scalar < 1) { $MORE_DIR_BOO = 0; } else { $dir = pop @MORE_DIRECTORIES; print "pop = $dir\n"; } print LOG "Total files changed for $dir = $DIR_BAD_FILE_COUNT\n"; } closedir $DIR; my $scalar = scalar @ERRORS; if ($scalar > 0) { print "\n\nSome errors were encountered:\n"; print LOG "\n\nSome errors were encountered:\n"; foreach my $error (@ERRORS) { print " $error\n"; print LOG "$error\n"; } } exit 0; # ===================================== sub fix_filename { my $full_path_filename = shift; my $filename_only = basename($full_path_filename); my $dir = dirname($full_path_filename); # print "Basename = $filename_only\n"; # print "DIRNAME = $dir\n"; # change all spaces into underscores $filename_only =~ s/\s/_/g; # if filename begins with non-alpha numeric character, remove it. $filename_only =~ s/^[^a-zA-Z\d]+//g; # special case, change [_] to underscore $filename_only =~ s/\[_\]/_/g; # change all asterisks into plus signs $filename_only =~ s/\*/\+/g; # change all square brackets into parenthesis $filename_only =~ s/\[/(/g; $filename_only =~ s/\]/)/g; # change all commas into underscores $filename_only =~ s/\,/_/g; # change all semicolons into underscores $filename_only =~ s/\;/_/g; # change all ticks (back grave) into underscores $filename_only =~ s/\`/_/g; # remove all single quotes $filename_only =~ s/\'//g; # remove all double quotes $filename_only =~ s/\"//g; # remove all exclamation points $filename_only =~ s/\!//g; # change all backslashes into underscores $filename_only =~ s/\\/_/g; # remove all question marks $filename_only =~ s/\?//g; # change all octothorpes into underscores $filename_only =~ s/\#/_/g; # change all dollar signs into underscores $filename_only =~ s/\$/_/g; # change all percent signs into underscores $filename_only =~ s/\%/_/g; # change all pipes into underscores $filename_only =~ s/\|/_/g; # change all right arrows into underscores $filename_only =~ s/\>/_/g; # change all left arrows into underscores $filename_only =~ s/\ \( -name '* *' -o -name '* .*' -o -name '* -*' -o -name '-*' # -o -name '*\**' -o -name '*;*' -o -name '*]*' -o -name '*[*' -o -name '*`*' # -o -name '*"*' -o -name "*'*" -o -name "*\!*" -o -name "*\\*" \) 2>/dev/null # # Search for maybe tar breakers: ? # $ % | <>^ # # find / \( -name '*\?*' -o -name '*\#*' -o -name '*;*' -o -name '*\$*' # -o -name '*\%*' -o -name '*\|*' -o -name '*\<*' -o -name '*>*' -o -name # '*^*' \) 2> /dev/null # # Here are the characters that I am sure will not break tar nor anything else # on Linux: # Alphanumeric, hyphen, underscores, periods (hyphen is same as minus sign - # but don't use it as first character in filename) # AZ az 1234567890-_. # # Here are more characters that I am pretty sure are safe. Just use them in # the middle of the file or the end of the file # I came to this conclusion because I found multiple files in /usr or /bin or # /lib or /etc with these characters in the filename # Parenthesis, equal sign, plus sign, tilde, carrot, ampersand, curly bracket, # colon, percent # ()=+~^&{},@:% #