#!/usr/local/bin/perl -w @REM=(" @perl -S %0.bat %1 %2 %3 %4 %5 %6 %7 %8 %9 @goto end ") if 0 ; # # Fix and convert tape archives # # (C) Copyright 1990,91 Diomidis Spinellis. All rights reserved. # # Permission to use, copy, and distribute this software and its # documentation for any purpose and without fee is hereby granted, # provided that the above copyright notice appear in all copies and that # both that copyright notice and this permission notice appear in # supporting documentation. # # THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # dds@doc.ic.ac.uk # # This program is not efficient. # A have tried instead to make it readable, flexible and easy to use. # For this I use lots of subroutines, local variables and higher order # functions. If you want something efficient rewrite it in C. # # You can very easily add a new conversion mode. Just add the conversion # function. If the conversion function needs to be applied to every # filename component add a map function and a function to prepare a component # to have acounter added to it. # If you need to convert another part of the header (e.g. uid) search for the # string HEADERMOD in this file. do 'getopts.pl' || die "$0: Unable to find getopts library: $!\n"; &Getopts('mncaf:t:s:e') || $usage++; $count = 0; $proc = 'copy'; if ($opt_c) { $proc = 'canonic'; $count++; } if ($opt_a) { $proc = 'noabs'; $count++; } if ($opt_f) { $proc = 'from_' . $opt_f; $count++; } if ($opt_t) { $proc = 'to_' . $opt_t; $count++; } if ($opt_s) { $proc = 'usershort'; $usershortlen = $opt_s; $count++; } if ($count > 1) { print STDERR "$0: Only one of -c -a -f -t -s can be specified\n"; $usage++; } if (! eval('&' . $proc . '("foo");')) { print STDERR "$0: Bad option specified $proc\n"; $usage++; } delete $map{'foo'}; if ($usage) { print STDERR "Usage: $0 -n -c -a -s N -f msdos|vms -t msdos|v7"; print STDERR " -n Do not work on tar files. Read and print a list of file names. -c Canonicalise filenames by removing /../, /./ and //. -a Fix absolute filenames by removing leading /. -s Convert filenames to the specified length N. -f Convert from format. Format can be msdos or vms. -t Convert to format. Format can be v7 (7th Edition) or msdos. -m Print a map table containing initial and final name on stderr. -e Remove trailing EOF Only one of -c -a -f -t -s can be specified.\n"; exit 1; } if ($opt_n) { while (<>) { s/\n$//; print &$proc($_), "\n"; } } else { ©tar(); } if ($opt_m) { while (($from, $to) = each(%map)) { print STDERR "$from $to\n"; } } exit 0; # Remove absolute file names. # We canonicalise since foo//bar is /bar on many Unixes sub noabs { local($name) = $_[0]; $name = &canonic($name); $name =~ s/^\///; return $name; } # Convert to MS-DOS # - Shorten name to 8 characters # - Remove all dots, but the last one # - Shorten extension to 3 characters # - Convert ,=+<>|; *?:[]\" to ^ # - Convert device name (con, aux ...) to _device sub to_msdos { local($nm); $nm = $_[0]; return &filemap($nm, 'mapmsdos', 'countprepmsdos'); # The following line fails on perl 3.0 patchlevel 18 XXX # return &filemap($_[0], 'mapmsdos', 'countprepmsdos'); } # Shortening routine for MS-DOS sub mapmsdos { local($name) = $_[0]; local($ext); # Leave only the last dot while ($name =~ s/(.*)\.(.*)\.(.*)/\1_\2.\3/g) { ; } # Convert funny characters to ^ $name =~ s/[,=+<>|; *?:[\]\\]/^/g; # Shorten the name if ($name =~ m/\./) { ($name, $ext) = split(/\./, $name); $name =~ s/^((con)|(aux[1-4]?)|(prn)|(lpt[1-3])|(clock\$))$/_\1/i; return &shorten($name, 8) . '.' . &shorten($ext, 3); } else { $name =~ s/^((con)|(aux[1-4]?)|(prn)|(lpt[1-3])|(clock\$))$/_\1/i; return &shorten($name, 8); } } # Count preparation routine for MS-DOS sub countprepmsdos { local($name) = $_[0]; local($ext); if ($name =~ m/\./) { ($name, $ext) = split(/\./, $name); return $name . '.' . substr($ext, 0, 1); } else { return $name . '.'; } } # Convert to 7th Edition type filesystems # - Shorten filenames to 14 characters sub to_v7 { local($nm); $nm = $_[0]; return &filemap($nm, 'mapv7', 'countprepv7'); } # Shortening routine for V7 sub mapv7 { return &shorten($_[0], 14); } # Count preparation routine for V7 sub countprepv7 { return substr($_[0], 0, 12); } # Shorten the filename components by a user specified amount sub usershort { local($nm); $nm = $_[0]; return &filemap($nm, 'mapusershort', 'countprepusershort'); } # Shortening routine for usershort sub mapusershort { return &shorten($_[0], $usershortlen); } # Count preparation routine for usershort sub countprepusershort { return substr($_[0], 0, $usershortlen); } # Convert from VMS # - Convert uppercase to lowercase # - Remove leading device name: or node:: # - Convert directory form [xxx] to xxx/ # - Remove trailing generation number # - Remove quoting characters ^V and " (XXX) # NOTE: I am an ignorant on VMS, so this probably need fixing. UNTESTED sub from_vms { local($name) = $_[0]; $name =~ tr/[A-Z]\\/[a-z]\//; $name =~ s/^[a-z]*::?//; $name =~ s/\[(.*)\](.*)/\1\/\2/; $name =~ s/;[0-9]+$//; $name =~ s/["\026]//g; return $name; } # Convert from MS-DOS # - Convert \ to / # - Convert uppercase to lowercase # - Remove leading device names sub from_msdos { local($name) = $_[0]; $name =~ tr/[A-Z]\\/[a-z]\//; $name =~ s/^[a-z]://; return $name; } # filemap(name, mapfunc, countprepfunc) # Go through every path element of name substituting it with the result # of mapfunc(element). If the filename is already used then substitute it # with the result of applying countprepfunc to with a two letter counter # appended. # Two associative arrays are kept to avoid the chance of re-using a name # %map contains the mappings from big names to small names # %used contains 1 for every short name that has been used # We keep partial file names to speed up the process # The filenames are always canonicalised sub filemap { local( @big, # Contains components of original @small, # Result is built in here @bigpart, # Part of big that has been done @s, # To try alternative mappings $name, # Part of path we are dealing with $count, # To create distinct names $try, # Remember map result $mapfunc, # Function to create new elements $countprepfunc # Function to prepare for counting ); $mapfunc = $_[1]; $countprepfunc = $_[2]; @big = split(/\//, &canonic($_[0])); @small = @bigpart = (); while (defined($name = shift(@big))) { push(@bigpart, $name); if (defined($try = $map{join('/', @bigpart)})) { # Found in map @small = split(/\//, $try); # The next line is needed because of buggy split # split(/x/, '') should give ('') not () @small = ('') if $#small == -1; } else { # Create new map # Even if the name is short we may have used it up # by shortening up a bigger one, so we may have to # count $name = &$mapfunc($name); $count = ''; while ($used{join('/', @s = (@small, $name . $count))}) { if ($count eq '') { $name = &$countprepfunc($name); $count = 'AA'; } else { $count++; } } @small = @s; $used{join('/', @small)} = 1; $map{join('/', @bigpart)} = join('/', @small); } } return join('/', @small); } # # Convert a single string to something close to it with length up # to length given sub shorten { local($str, $len) = @_; # Do "fonetic speling" from end to beginning while ( length($str) > $len && ( $str =~ s/(.*)([fglmnprst])\2(.*)/\1\2\3/i || $str =~ s/(.*)(ou)(.*)/\1u\3/i || $str =~ s/(.*)(ck)(.*)/\1k\3/i || $str =~ s/(.*)(ks)(.*)/\1x\3/i || $str =~ s/(.*)(sh)(.*)/\1s\3/i || $str =~ s/(.*)(ph)(.*)/\1f\3/i || $str =~ s/(.*)(oo)(.*)/\1u\3/i ) ) { ; } # Remove lowercase vowels from the end to the beginning while ( length($str) > $len && $str =~ s/(.*)[aeiou](.*)/\1\2/ ) { ; } # Remove uppercase vowels from the end to the beginning while ( length($str) > $len && $str =~ s/(.*)[AEIOU](.*)/\1\2/ ) { ; } # Finally cut characters from the end $str = substr($str, 0, $len); return $str; } # Create a canonic file name out of one containing .. and . # Employ Unix semantics: empty file means root directory. sub canonic { local(@comp, @can); @comp = split(/\//, $_[0]); for ($i = 0; $i <= $#comp; $i++) { if ($comp[$i] eq '.') { ; } elsif ($comp[$i] eq '') { @can = (); push(@can, ''); } elsif ($comp[$i] eq '..') { pop(@can); } else { push(@can, $comp[$i]); } } return join('/', @can); } # A do nothing procedure sub copy { return $_[0]; } # Copy a tape archive from stdin to stdout sub copytar { binmode STDIN; binmode STDOUT; forloop: for(;;) { read(STDIN, $header, 512) == 512 || die "$0: Couldn't read header: $!\n"; if ($header eq "\0" x 512) { last forloop; } ($name, $mode, $uid, $gid, $size, $mtime, $checksum, $linkflag, $linkname) = unpack("a100 A7x A7x A7x A12 A12 a8 a1 a100", $header); # # Header modification code should be put here # HEADERMOD $name =~ s/[\000 ]*//g; $name = &$proc($name); if ($linkflag != "\0") { $linkname =~ s/[\000 ]*//g; $linkname = &$proc($linkname); } # Create dummy header for checksum calculation (checksum is blanks) $hnew = pack("a99x A6a1x A6a1x A6a1x A11a1 A11a1 a8 a1 a99x x255", ($name, $mode, ' ', $uid, ' ', $gid, ' ', $size, ' ', $mtime, ' ', ' ' x 8, $linkflag, $linkname)); $sz = $size; $sz =~ s/ *//g; $sz = oct($sz); $checksum =~ s/ *//g; $checksum = oct($checksum); $newcheck = &check($hnew); # Create the header with the new checksum $hnew = pack("a99x A6a1x A6a1x A6a1x A11a1 A11a1 a6xa1 a1 a99x x255", ($name, $mode, ' ', $uid, ' ', $gid, ' ', $size, ' ', $mtime, ' ', sprintf('%6o', $newcheck), ' ', $linkflag, $linkname)); print STDOUT $hnew; # Copy contents for ($i = 0; $i < $sz; $i += 512) { read(STDIN, $contents, 512) == 512 || die "$0: Couldn't read data: $!\n"; print STDOUT $contents; } #seek(STDIN, (int($sz / 512) + 1) * 512, 1) unless $sz == 0; } # Write EOF if (!$opt_e) { print STDOUT pack("x512", ()); print STDOUT pack("x512", ()); } } # Return checksum for tar header block sub check { $h = $_[0]; local($i, $s); $s = 0; for($i = 0; $i < 512; $i++) { $s += unpack('C', substr($h, $i, 1)); } return $s; } " :end ", 0;