#!/usr/bin/perl # copyright: (c)2004 Bill "TTK" Moyer # this software may be distributed, developed, and used without restrictions. # # Filesystem listing utility: like du, with many added features use Fcntl ':mode'; my @MD5_REGEX = (); my @MD5_NOTEX = (); my %FS = (); # hash on filesystem mountpoint to reference to hash describing filesystem my $DISPLAYED_FS = 0; my %OPT = (); my @FILE_LIST = (); foreach my $arg ( @ARGV ) { if ( $arg =~ /^\-+md5-only\=(.+)/ ) { my $rex = $1; $rex = join ( '\.', split ( /\./, $rex ) ); push ( @MD5_REGEX, $rex ); $OPT{'md5'} = 1; $OPT{'md5-only'} = 1; } elsif ( $arg =~ /^\-+md5-no\=(.+)/ ) { my $rex = $1; $rex = join ( '\.', split ( /\./, $rex ) ); push ( @MD5_NOTEX, $rex ); $OPT{'md5'} = 1; } elsif ( $arg =~ /^\-+(.+?)\=(.+)/ ) { $OPT{$1} = $2; } elsif ( $arg =~ /^\-+help$/ ) { usage(); exit(0); } elsif ( $arg =~ /^\-+h$/ ) { usage(); exit(0); } elsif ( $arg =~ /^\-+(.+)/ ) { $OPT{$1} = 1; } else { push ( @FILE_LIST, $arg ); } } $OPT{'b'} = 1 if ( defined($OPT{'a'}) ); if ( defined ( $OPT{'md5'} ) ) { use Digest::MD5 qw(md5 md5_hex md5_base64); } ############################################################################## # FTYPE stuff here use POSIX; use Fcntl qw(:flock O_RDWR O_CREAT O_EXCL O_RDONLY); # use lib '/petabox/sw/modules'; # use lib '/usr/cluster/modules'; # use DR; my $DEBUGGING = 0; $DEBUGGING = $OPT{'d'} if ( defined ( $OPT{'d'} ) ); my @PRESIGS = ( [ "image/png", [ 0x89, 0x50, 0x4e, 0x47 ] ], [ "image/jpeg", [ 0xff, 0xd8, 0xff, 0xe0 ] ], [ "image/jpeg", [ 0xff, 0xd8, 0xff, 0xe1 ] ], [ "text/pdf", [ 0x25, 0x50, 0x44, 0x46 ] ], [ "text/word", [ 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1, 0x00 ] ], [ "text/word", [ 0xfe, 0x37, 0x00, 0x23, 0x00 ] ], [ "text/word", [ 0xdb, 0xa5, 0x2d, 0x00 ] ], # specifically: Word2 format (.do2) [ "audio/aiff", [ 0x46, 0x4f, 0x52, 0x4d, 0x00 ] ], [ "video/qt", [ 0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x71, 0x74 ] ], [ "audio/mpeg3", [ 0xff, 0xf3, 0x88, 0xc4, 0x00 ] ], [ "audio/wav", [ 0x52, 0x49, 0x46, 0x46, 0xa5 ] ], [ "image/icon", [ 0x00, 0x00, 0x01, 0x00, 0x01 ] ], [ "binary/cdf", [ 0xcd, 0xf2, 0x60, 0x02, 0x00 ] ], [ "archive/gzip", [ 0x1f, 0x8b, 0x08 ] ], [ "image/tiff", [ 0x4d, 0x4d, 0x00, 0x2a ] ], [ "image/tiff", [ 0x49, 0x49, 0x2a, 0x00 ] ], [ "binary/elf", [ 0x7f, 0x45, 0x4c, 0x46 ] ], [ "binary/pp3", [ 0x0b, 0xad, 0xde, 0xed, 0x00 ] ], [ "text/rtf", [ 0x7b, 0x5c, 0x72, 0x74, 0x66, 0x31 ] ], [ "image/pds-ccsd", [ 0x43, 0x43, 0x53, 0x44, 0x33, 0x5a ] ], [ "script/sh", [ 0x23, 0x21, 0x2f, 0x62, 0x69, 0x6e, 0x2f, 0x73, 0x68, 0x0a ] ], [ "script/perl", [ 0x23, 0x21, 0x2f, 0x75, 0x73, 0x72, 0x2f, 0x62, 0x69, 0x6e, 0x2f, 0x70, 0x65, 0x72, 0x6c, 0x0a ] ], [ "video/mpeg2", [ 0x00, 0x00, 0x01, 0xb3 ] ] ); my %PREFIXES = (); # maps first byte of bitstring to hash on raw bitstring to MIME type foreach my $par ( @PRESIGS ) { my ( $type, $ar ) = @{$par}; my $x = join ( '', map { chr($_) } @{$ar} ); my $px = substr ( $x, 0, 1 ); $PREFIXES{$px}->{$x} = $type; } my @REXSIGS = ( [ 'image/gif', '^GIF.*?\\000', '' ], [ 'text/ps', '\%\!PS\-Adobe', 'i' ], [ 'text/html', '\<\!\-\-/', '' ], [ 'text/css', '\/\* CSS Document', '' ], [ 'text/css', '^\s*[\#\w]+\s+\{', 'i' ], [ 'text/html', '\<\!DOCTYPE HTML', 'i' ], [ 'text/html', '\', 'i' ], [ 'text/xml', '\<\?xml', 'i' ] ); # this hash maps "our" file types to MIME compatible file types my %TYPE2MIME = ( "text/pdf"=>"application/pdf", "text/word"=>"application/msword", "audio/aiff"=>"audio/x-aiff", "video/qt"=>"video/quicktime", "audio/mpeg3"=>"audio/mpeg", "audio/wav"=>"audio/x-wav", "image/icon"=>"image/x-icon", "binary/cdf"=>"application/x-netcdf", "archive/gzip"=>"application/x-gzip", "binary/pp3"=>"application/octet-stream", "image/pds-ccsd"=>"application/octet-stream", "script/sh"=>"application/x-sh", "video/mpeg2"=>"video/mpeg", "text/ps"=>"application/postscript", "executable/static-library"=> ); my %SUFFIXES = ( "a"=>"executable/static-library", "aif"=>"audio/aiff", "ar"=>"executable/library-archive", "arc"=>"archive/arc", "arj"=>"archive/arc", "asc"=>"text/plain", "asm"=>"source/assembly", "asp"=>"source/active", "aspx"=>"source/active", "atom"=>"source/sh", "avi"=>"video/avi", "awk"=>"source/awk", "bak"=>"archive/backup", "bash"=>"source/bash", "bat"=>"source/batch", "bin"=>"binary/executable", "bmp"=>"image/bitmap", "bu"=>"archive/backup", "bu2"=>"archive/backupk", "bup"=>"archive/backup", "bup0"=>"archive/backup", "bz2"=>"archive/bzip2", "c32"=>"digest/crc32", "cc"=>"source/cplusplus", "cdx"=>"archive/cdx", "cf"=>"source/config", "cfg"=>"source/config", "cfg0"=>"source/config", "cfg1"=>"source/config", "cgi"=>"executable/cgi", "cnf"=>"source/config", "code"=>"source/unknown", "conf"=>"source/config", "cor"=>"text/plain", "cpp"=>"source/cplusplus", "cron"=>"source/config", "csh"=>"source/cshell", "css"=>"text/css", "csv"=>"text/csv", "cvs"=>"archive/cvs", "cx88"=>"source/cplusplus", "cxx"=>"source/cplusplus", "dat"=>"binary/data", "data"=>"binary/data", "db"=>"source/sql", "dbm"=>"archive/berkeleydb", "dcl"=>"source/dcl", "ddl"=>"application/doodle", "def"=>"source/unknown", "defs"=>"source/make", "deny"=>"source/config", "dep"=>"source/make", "deps"=>"source/make", "desc"=>"text/plain", "df"=>"source/make", "dft"=>"source/config", "dia"=>"application/dia", "dict"=>"archive/dictionary", "dif"=>"source/patch", "diff"=>"source/patch", "dir"=>"manifest/directory", "dirs"=>"manifest/directory", "disk"=>"archive/disk", "dist"=>"source/unknown", "dj"=>"source/unknown", "dj2"=>"source/unknown", "djvu"=>"text/djvu", "dll"=>"executable/dynamic-library", "dlls"=>"executable/dynamic-library", "dns"=>"source/config", "do2" => "text/word", "doc"=>"text/word", "dot"=>"application/dot", "drm"=>"application/drm", "dsc"=>"text/plain", "dsk"=>"archive/disk", "dst"=>"source/unknown", "dtml"=>"text/dtml", "du"=>"manifest/du", "dump"=>"text/unknown", "dvd"=>"archive/disk", "dvi"=>"text/tex", "elf"=>"executable/unknown", "env"=>"source/config", "eps"=>"text/postscript", "err"=>"text/plain", "exe"=>"executable/msdos", "f90"=>"source/fortran", "faq"=>"text/plain", "fcgi"=>"executable/cgi", "fig"=>"application/xfig", "flac"=>"audio/flac", "flv"=>"video/shockwave", "frm"=>"text/plain", "gif"=>"image/gif", "gpg"=>"application/gpg", "gpl"=>"text/plain", "gz"=>"archive/gzip", "hash"=>"text/hash", "hpfs"=>"archive/disk", "hqx"=>"archive/hqx", "htm"=>"text/html", "html"=>"text/html", "hxx"=>"source/cplusplus", "i"=>"source/c", "icbm"=>"source/config", "icn"=>"image/icon", "icns"=>"image/icon", "ico"=>"image/icon", "icon"=>"image/icon", "img"=>"archive/disk", "inc"=>"source/php", "incl"=>"source/m4", "inf"=>"source/config", "info"=>"text/info", "ini"=>"source/config", "iso"=>"archive/disk", "jar"=>"archive/java", "jav"=>"source/java", "java"=>"source/java", "jawt"=>"source/config", "jaxp"=>"source/config", "jfs"=>"archive/disk", "jp"=>"image/jpeg", "jp2"=>"image/jpeg", "jpeg"=>"image/jpeg", "jpg"=>"image/jpeg", "js"=>"source/javascript", 'json'=> "text/json", "jsp"=>"source/javascript", "jspx"=>"source/javascript", "jv"=>"source/java", "kde"=>"source/config", "key"=>"source/config", "keys"=>"source/config", "ksh"=>"source/korn", "ld"=>"source/linker", "ldap"=>"archive/ldap", "lex"=>"source/lex", "lgpl"=>"text/plain", "lib"=>"executable/library-archive", "lisp"=>"source/lisp", "lnk"=>"source/config", "lnx"=>"source/config", "lo"=>"executable/linkable", "lock"=>"system/lock", "log"=>"text/log", "m"=>"source/modula2", "m2"=>"source/modula2", "m2v"=>"video/mpeg2", "m3u"=>"audio/mpeg3", "m4"=>"video/mpeg4", "m4a"=>"video/mpeg4", "m4f"=>"video/mpeg4", "m4n"=>"video/mpeg4", "m4v"=>"video/mpeg4", "mail"=>"application/mail", "mak"=>"source/make", "make"=>"source/make", "mbox"=>"application/mail", "md"=>"source/machine", "md5"=>"digest/md5", "menu"=>"source/config", "miff"=>"image/miff", "mime"=>"archive/mime", "mnu"=>"source/config", "mov"=>"video/qt", "moz"=>"source/config", "mp2"=>"video/mpeg2", "mp3"=>"audio/mpeg3", "mp4"=>"video/mpeg4", "mpeg"=>"video/mpeg2", "mpg"=>"video/mpeg1", "news"=>"text/usenet", "nfo"=>"source/config", "nfsd"=>"archive/disk", "nn"=>"text/html", "nntp"=>"text/usenet", "note"=>"text/plain", "nrpe"=>"source/config", "o"=>"executable/linkable", "obj"=>"executable/linkable", "ogg"=>"audio/ogg", "opt"=>"source/config", "orig"=>"archive/original", "out"=>"text/log", "out2"=>"text/log", "ovr"=>"executable/overlay", "p"=>"source/pascal", "p2"=>"source/config", "pas"=>"source/pascal", "pbm"=>"image/pbm", "pdf"=>"text/pdf", "perl"=>"source/perl", "pgp"=>"source/config", "ph"=>"source/php", "php"=>"source/php", "php3"=>"source/php", "pic"=>"image/pic", "pict"=>"image/pict", "pid"=>"system/pid", "pine"=>"application/mail", "pkg"=>"archive/package", "pl"=>"source/perl", "pm"=>"source/perl", "png"=>"image/png", "pnm"=>"image/pnm", "pod"=>"text/pod", "pods"=>"manifest/pods", "ppm"=>"image/ppm", "pps"=>"application/office", "ppt"=>"application/powerpoint", "prj"=>"application/project", "prl"=>"source/prolog", "pro"=>"source/prolog", "proj"=>"application/project", "ps"=>"text/postscript", "pxe"=>"source/config", "py"=>"source/python", "pyc"=>"binary/python", "pyo"=>"binary/python", "pyw"=>"binary/python", "qd"=>"text/quickdump", "qnx"=>"archive/qnx", "qq"=>"source/config", "r"=>"source/c", "ram"=>"audio/realmedia", "rar"=>"archive/rar", "real"=>"audio/realmedia", "rm"=>"audio/realmedia", "rmvb"=>"audio/realmedia", "ro"=>"source/config", "rom"=>"source/config", "rpm"=>"archive/rpm", "rr"=>"source/borne", "rrd"=>"application/rrdtool", "rsa"=>"source/config", "rsrc"=>"binary/resource", "rtf"=>"text/rich", "s"=>"source/assembly", "sav"=>"application/freeciv", "sbin"=>"executable/executable", "scr"=>"source/config", "scrc"=>"source/config", "sed"=>"source/sed", "sgm"=>"text/sgml", "sgml"=>"text/sgml", "sh"=>"source/sh", "sha1"=>"digest/sha1", "shn"=>"audio/shorten", "shnf"=>"audio/shorten", "sig"=>"text/plain", "sit"=>"archive/stuffit", "skip"=>"source/config", "slk"=>"archive/silk", "sml"=>"text/xml", "smp"=>"source/config", "smtp"=>"application/mail", "snd"=>"audio/sound", "so"=>"executable/dynamic-library", "sock"=>"system/socket", "sort"=>"manifest/sort", "sp"=>"source/config", "spec"=>"application/freeciv", "spt"=>"binary/spt", "sq"=>"source/sql", "sql"=>"source/sql", "src"=>"source/cplusplus", "ssh"=>"source/config", "sshd"=>"source/config", "stat"=>"text/log", "stop"=>"system/stop", "sv"=>"application/subversion", "svg"=>"image/svg", "svn"=>"application/subversion", "sw"=>"video/shockwave", "swf"=>"video/shockwave", "swg"=>"source/swig", "sym"=>"system/symlink", "syn"=>"source/config", "sync"=>"source/config", "sys"=>"application/mysql", "t"=>"source/test", "tab"=>"source/config", "tar"=>"archive/tar", "tbl"=>"source/config", "tbz2"=>"archive/bzip2", "tcl"=>"source/tcl", "tcsh"=>"source/tcshell", "tex"=>"text/tex", "texi"=>"text/tex", "text"=>"text/plain", "tf"=>"source/config", "tfrc"=>"source/config", "tgz"=>"archive/gzip", "tif"=>"image/tiff", "tiff"=>"image/tiff", "tk"=>"source/make", "tlog"=>"system/log", "tmp"=>"system/temporary", "tmpl"=>"source/make", "tpl"=>"source/make", "tr"=>"source/config", "trm"=>"source/c", "ttf"=>"binary/font", "txt"=>"text/plain", "txt2"=>"text/plain", "url"=>"text/url", "urls"=>"text/url", "utf"=>"text/utf8", "utf8"=>"text/utf8", "uu"=>"archive/uuencoded", "uue"=>"archive/uuencoded", "vbs"=>"source/visualbasic", "vid"=>"video/video", "wad"=>"application/doom", "war"=>"archive/zip", "wav"=>"video/wav", "wc"=>"source/config", "win"=>"source/make", "wma"=>"audio/wman", "wmv"=>"video/wmv", "x"=>"source/linker", "xbm"=>"source/xbm", "xfs"=>"archive/disk", "xls"=>"application/excel", "xml"=>"text/xml", "xpm"=>"image/pixmap", "xpt"=>"binary/xpt", "xs"=>"source/config", "xsl"=>"source/xml", "xslt"=>"source/xml", "xul"=>"source/xul", "xvim"=>"application/vim", "y"=>"source/yacc", "yacc"=>"source/yacc", "yaml"=>"source/yaml", "yml"=>"source/yaml", "yy"=>"source/yacc", "z"=>"archive/compress", "zip"=>"archive/zip" ); my $TRY_SO_HARD = $OPT{'try'} || 'hardest'; my $PUNT = 1; $PUNT = 0 if ( defined ( $OPT{'no-punt'} ) ); # END FTYPE ############################################################################## my $EXPLICITDOT = 1; my $USE_MD5SUM_BIN_THRESHOLD = $OPT{'md5-lvl'} || 10000000; # 10MB my $ID_F = $OPT{'fsid'} || ''; $ID_F = '.fs_id' if ( $ID_F eq '1' ); if ( scalar(@FILE_LIST) < 1 ) { push ( @FILE_LIST, '.' ); $EXPLICITDOT = 0; } my $OPT_FILESYSTEMS = 0; $OPT_FILESYSTEMS = 1 if ( defined($OPT{'fsid'}) ); # include filesystem id in output $OPT_FILESYSTEMS = 1 if ( defined($OPT{'local-filesystems'}) ); # exclude nfs-mounted $OPT_FILESYSTEMS = 1 if ( defined($OPT{'normal-filesystems'}) ); # exclude /sys, /proc, /dev $OPT_FILESYSTEMS = 1 if ( defined($OPT{'display-filesystems'}) ); # prefix output with #DF attribs my $OPTIMIZE_FILESYSTEMS = 1; if ( ( $OPT_FILESYSTEMS == 1 ) && ( -x '/bin/df' ) ) { my $buf = ""; my $n_fs = 0; foreach my $x ( `/bin/df -kT` ) { chomp ( $x ); next if ( $x =~ /^Filesystem/ ); if ( $x =~ /^[^\s]+$/ ) { $buf = $x; next; } if ( $x =~ /^\s/ ) { $x = "$buf $x"; $buf = ""; } $n_fs++; my ( $fs, $ty, $blk, $used, $avail, $perc_used, $mp ) = split ( /\s+/, $x, 7 ); my $fsid = '(undef)'; unless ( defined ( $mp ) ) { $x = join ( ' ', split ( /[\r\t\n\s]+/s, $x ) ); print ("#WARNING /bin/df:$n_fs malformed line [$x]"); next; } if ( defined ( $OPT{'fsid'} ) ) { if ( -e "$mp/$ID_F" ) { if ( -r "$mp/$ID_F" ) { $fsid = rd_f ( "$mp/$ID_F" ); $fsid = '(error4)' if ( $fsid =~ /^ERROR/ ); chomp ( $fsid ); } else { $fsid = '(unk)'; } } else { $fsid = '(none)'; } } $perc_used = $1 if ( $perc_used =~ /(\d+)/ ); $OPTIMIZE_FILESYSTEMS = 0 if ( $mp =~ /^\/.*?\// ); $FS{$mp} = { _fs=>$fs, _type=>$ty, _blocks=>$blk, _used=>$used, _available=>$avail, _pu=>$perc_used, _mp=>$mp, _fsid=>$fsid }; } $FS{'/dev'} = { _fs=>'dev', _type=>'sys', _blocks=>0, _used=>0, _available=>0, _pu=>0, _mp=>'/dev', _fsid=>'(none)' }; $FS{'/sys'} = { _fs=>'sys', _type=>'sys', _blocks=>0, _used=>0, _available=>0, _pu=>0, _mp=>'/sys', _fsid=>'(none)' }; $FS{'/proc'} = { _fs=>'proc', _type=>'sys', _blocks=>0, _used=>0, _available=>0, _pu=>0, _mp=>'/proc', _fsid=>'(none)' }; if ( defined ( $OPT{'display-filesystems'} ) ) { my $hsh = 0; $hsh = 1 if ( defined($OPT{'hash'}) ); $hsh = 1 if ( defined($OPT{'hash-filesystems'}) ); foreach my $fs_k ( sort ( keys ( %FS ) ) ) { my $r = $FS{$fs_k}; my $txt = "#DF"; if ($hsh) { $txt .= "\tfs="; } else { $txt .= " "; } $txt .= $r->{_fs}; if ($hsh) { $txt .= "\tty="; } else { $txt .= " "; } $txt .= $r->{_type}; if ($hsh) { $txt .= "\tbl="; } else { $txt .= " "; } $txt .= $r->{_blocks}; if ($hsh) { $txt .= "\tus="; } else { $txt .= " "; } $txt .= $r->{_used}; if ($hsh) { $txt .= "\tav="; } else { $txt .= " "; } $txt .= $r->{_available}; if ($hsh) { $txt .= "\tpu="; } else { $txt .= " "; } $txt .= $r->{_pu}; if ($hsh) { $txt .= "\tid="; } else { $txt .= " "; } $txt .= $r->{_fsid}; if ($hsh) { $txt .= "\tmp="; } else { $txt .= " "; } $txt .= $r->{_mp}; $txt .= "\n"; print ( $txt ); } } } else { print ("#WARNING cannot execute /bin/df, disallowing filesystem-savvy operations\n") unless ( $OPT_FILESYSTEMS == 0 ); $OPT_FILESYSTEMS = 0; } foreach my $x ( @FILE_LIST ) { dy ( $x ); } exit(0); sub dy { my ( $FPATH, $depth ) = @_; $depth = $depth || 1; my $maxdepth = $OPT{'maxdepth'} || 200; $maxdepth = 1 if (defined($OPT{'nr'})); my ( @S, $BYTES, $KB ); my $TY = 'f'; my $LINK; my $PAD_SLEEP = 0; my %h = (); my $fs_r; my $fs_id = ''; if ( $OPT_FILESYSTEMS == 1 ) { if ( $OPTIMIZE_FILESYSTEMS == 1 ) { # yay! we can use the fast/easy lookup method! my $mp = $1 if ( $FPATH =~ /^(\/[^\/]*)/ ); $mp = '/' unless ( defined ( $mp ) ); $fs_r = $FS{$mp} || $FS{'/'}; $fs_id = $fs_r->{_fsid} || '(error1)'; } else { foreach my $fs_mp ( keys ( %FS ) ) { next unless ( $FPATH =~ /^$fs_mp\// ); $fs_r = $FS{$fs_key}; $fs_id = $fs_r->{_fsid} || '(error2)'; } } unless ( defined ( $fs_r ) ) { $fs_r = $FS{'/'}; $fs_id = '(error3)'; } return if ( defined($OPT{'local-filesystems'} ) && ( $fs_r->{_type} eq 'nfs' ) ); return if ( defined($OPT{'normal-filesystems'}) && ( $fs_r->{_type} eq 'sys' ) ); } if ( $LINK = readlink ($FPATH)) { $TY = 'l'; if ( defined($OPT{'ct'}) || defined($OPT{'mime'}) ) { $h{'ct'} = 'special/symlink'; } if ( defined($OPT{'dump'}) ) { my $n = $OPT{'dump'}; $n = 16 if ( $n == 1 ); $h{'dh'} = ' 'x($n*3-1); $h{'dc'} = ' 'x$n; } } if ( $TY eq 'f' ) { if ( !-e $FPATH ) { print ("ERROR no file '$FPATH'\n"); return; } @S = stat ( _ ); # returns stat from previous -x operation if ( S_ISDIR ($S[2]) ) { $TY = 'd'; } elsif ( S_ISBLK ($S[2]) ) { $TY = 'b'; } elsif ( S_ISCHR ($S[2]) ) { $TY = 'c'; } elsif ( S_ISFIFO ($S[2]) ) { $TY = 'p'; } elsif ( S_ISSOCK ($S[2]) ) { $TY = 's'; } if ( ( $S[2] & 0111 ) && ( $TY eq 'f' ) && !defined($OPT{'nox'}) ) { $TY = 'x'; } if ( defined($OPT{'ct'}) || defined($OPT{'mime'}) ) { if ( ( $TY eq 'f' ) || ( $TY eq 'x' ) ) { my $q = "'"; $q = '"' if ( $FPATH =~ /\'/ ); my $ftype = type_file ( $FPATH, $TRY_SO_HARD, $PUNT ); $ftype = substr ( $ftype, 0, $OPT{'ct'} ) if ( $OPT{'ct'} > 1 ); $h{'ct'} = $ftype; } elsif ( $TY eq 'd' ) { $h{'ct'} = ('special/dir'); if(defined($OPT{'dump'})) { my $n = $OPT{'dump'}; $n = 16 if ( $n < 2 ); $h{'dh'} = ' 'x($n*3-1); $h{'dc'} = ' 'x$n; } } elsif ( $TY eq 'b' ) { $h{'ct'} = ('special/block'); if(defined($OPT{'dump'})) { my $n = $OPT{'dump'}; $n = 16 if ( $n < 2 ); $h{'dh'} = ' 'x($n*3-1); $h{'dc'} = ' 'x$n; } } elsif ( $TY eq 'c' ) { $h{'ct'} = ('special/char'); if(defined($OPT{'dump'})) { my $n = $OPT{'dump'}; $n = 16 if ( $n < 2 ); $h{'dh'} = ' 'x($n*3-1); $h{'dc'} = ' 'x$n; } } elsif ( $TY eq 'p' ) { $h{'ct'} = ('special/fifo'); if(defined($OPT{'dump'})) { my $n = $OPT{'dump'}; $n = 16 if ( $n < 2 ); $h{'dh'} = ' 'x($n*3-1); $h{'dc'} = ' 'x$n; } } elsif ( $TY eq 's' ) { $h{'ct'} = ('special/socket'); if(defined($OPT{'dump'})) { my $n = $OPT{'dump'}; $n = 16 if ( $n < 2 ); $h{'dh'} = ' 'x($n*3-1); $h{'dc'} = ' 'x$n; } } } if ( defined($OPT{'dump'}) && !defined($h{'dh'}) ) { $OPT{'dump'} = 16 if ( $OPT{'dump'} == 1 ); my $fd = rand(); if ( open ( $fd, "<$FPATH" ) ) { my ( $buf, $pbuf, $hbuf ) = ( '', '', '' ); sysread ( $fd, $buf, $OPT{'dump'} ); foreach my $c ( split ( //, $buf ) ) # FIXME - figure how to do this faster with pack/unpack { my $v = ord ( $c ); if ( $v < 32 ) { $pbuf .= '.'; } elsif ( $v > 126 ) { $pbuf .= '.'; } else { $pbuf .= $c; } $hbuf .= sprintf('%02x ',$v); } chop ( $hbuf ); $hbuf .= ' 'x(($OPT{'dump'}*3-1)-length($hbuf)); $pbuf .= ' 'x( $OPT{'dump'} -length($pbuf)); $h{'dh'} = $hbuf; $h{'dc'} = $pbuf; close ( $fd ); } } } else { if ( -e $FPATH ) { @S = stat ( _ ); } else { @S = ( 0, 0, 0777, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 ); # FIXME - do something more reasonable } } if ( defined($OPT{'dump'}) && !defined($h{'dh'}) ) { my $n = $OPT{'dump'}; $n = 16 if ( $n < 2 ); $h{'dh'} = ' 'x($n*3-1); $h{'dc'} = ' 'x$n; } if ( defined($OPT{'dump'}) && defined($OPT{'hash'}) && ($h{'dh'} =~ /^\s+$/) ) { delete $h{'dh'}; delete $h{'dc'}; } $BYTES = $S[7]; $KB = $BYTES; if ( defined($OPT{'b'}) ) { $KB = ((' 'x(10-length($KB))).$KB) unless ( defined ( $OPT{'hash'} ) ); } else { $KB = int (($KB / 1000) + 0.5); $KB = ((' 'x(7-length($KB))).$KB) unless ( defined ( $OPT{'hash'} ) ); } my $PER = sprintf ( "%03o", $S[2] & 0777 ); my $UID = $S[4]; $UID = ((' 'x(4-length($UID))).$UID) unless ( defined ( $OPT{'hash'} ) ); my $LI = $S[3]; $LI = ((' 'x(2-length($S[3]))).$S[3]) unless ( defined ( $OPT{'hash'} ) ); my $TIM = $S[9]; my $INO = sprintf ( "%08X", $S[1] ); $FPATH .= '/' if ( ( $TY eq 'd' ) && ( $FPATH !~ /\/$/ ) ); my $PRF = $FPATH; $PRF = $1 if ( $PRF =~ /^\.\/(.+)/ ); my $md5_length = $OPT{'md5-len'} || 32; my $CONTENT_HASH = '-'x$md5_length; if ( defined ( $OPT{'md5'} ) && ( !defined ( $OPT{'md5-size'} ) || ( $BYTES <= $OPT{'md5-size'} ) ) && ( ( $TY eq 'f' ) || ( $TY eq 'x' ) ) ) { my $go_ahead = 1; $go_ahead = 0 if ( defined($OPT{'md5-only'}) ); foreach my $rex ( @MD5_REGEX ) { next unless ( $FPATH =~ /$rex/i ); $go_ahead = 1; last; } foreach my $rex ( @MD5_NOTEX ) { next unless ( $FPATH =~ /$rex/i ); $go_ahead = 0; last; } if ( $go_ahead == 1 ) { my $md5_btime = time(); if ( ( $BYTES < $USE_MD5SUM_BIN_THRESHOLD ) || ( !-x '/usr/bin/md5sum' ) || ( $FPATH =~ /[\'\$\!]/ ) ) { my $fh; if ( open ( $fh, $FPATH ) ) { my $buf; my $res = sysread ( $fh, $buf, $BYTES ); if ( defined ( $res ) && ( $res >= $BYTES ) ) { $CONTENT_HASH = md5_hex($buf); } close ( $fh ); } } else { my $buf = `/usr/bin/md5sum '$FPATH' 2>\&1`; my $res = $?; if ( $res == 0 ) { $buf = $1 if ( $buf =~ /^([^\s]+)/ ); $CONTENT_HASH = $buf; } } # END of if..else my $md5_dtime = time() - $md5_btime; if ( defined($OPT{'pad-md5-time'}) ) { my $pmod = $OPT{'pad-md5-time'}; $pmod = int ( $md5_dtime * ( $1 / 100 ) ) if ( $pmod =~ /^([\.\d]+)[pP\%]/ ); $PAD_SLEEP = int($pmod); } } # END of if $go_ahead } # END of if md5 if ( $EXPLICITDOT || ( $PRF ne './' ) ) { my $txt = pr_dy ( $TIM, $KB, $TY, $PER, $UID, $LI, $INO, $PRF, $CONTENT_HASH, $fs_id, \%h ); print ( $txt ); } sleep ( $PAD_SLEEP ) if ( $PAD_SLEEP > 0 ); $PAD_SLEEP = 0; return if ( $depth >= $maxdepth ); if ( $TY eq 'd' ) { my $dh; return if ( !opendir ( $dh, $FPATH ) ); my $x; while ( defined ( $x = readdir($dh) ) ) { next if ( $x eq '.' ); next if ( $x eq '..' ); dy ( "$FPATH$x", $depth+1 ); } closedir ( $dh ); } } sub pr_dy { my ( $TIM, $KB, $TY, $PER, $UID, $LI, $INO, $PRF, $MD5, $fs_id, $more_hr ) = @_; my $txt = ''; $MD5 = '----------------------' unless ( defined ( $MD5 ) ); if ( defined($OPT{'tx'}) || defined($OPT{'xtime'}) || defined($OPT{'hextime'}) || defined($OPT{'min'}) ) { $TIM = sprintf ( '%08X', $TIM ); } elsif ( defined($OPT{'td'}) || defined($OPT{'dx'}) || defined($OPT{'dtime'}) || defined($OPT{'dectime'}) ) { $TIM = sprintf ( '%d', $TIM ); } else { my @t = localtime($TIM); for(my $i=0;$i<4;$i++) { $t[$i] = sprintf ('%02d',$t[$i]); } $t[4] = sprintf ('%02d',$t[4]+1); $t[5] += 1900; $TIM = "$t[5]-$t[4]-$t[3] $t[2]:$t[1]:$t[0]"; } if ( defined ( $OPT{'md5-len'} ) ) { $MD5 = substr ( $MD5, -1 * $OPT{'md5-len'} ); } if ( defined($OPT{'hash'}) ) { $txt = "$PRF\ttm=$TIM\tsz=$KB\tty=$TY\t"; $txt .= "m5=$MD5\t" if ( defined($OPT{'md5'}) && ( $MD5 !~ /^\-/ ) ); $txt .= "fi=$fs_id\t" if ( defined($OPT{'fsid'}) ); $txt .= "pm=$PER\t" if ( !defined($OPT{'noperms'}) && !defined($OPT{'min'}) ); $txt .= "ui=$UID\t" if ( !defined($OPT{'nouid'}) && !defined($OPT{'min'}) ); $txt .= "li=$LI\t" if ( !defined($OPT{'nolink'}) && !defined($OPT{'min'}) ); $txt .= "in=$INO\t" if ( defined($OPT{'i'}) || defined($OPT{'inode'}) || defined($OPT{'inodes'})); if ( defined ( $more_hr ) ) { foreach my $k ( sort ( keys ( %{$more_hr} ) ) ) { $x = $more_hr->{$k}; $k = $1 if ( $k =~ /^\_+/ ); $txt .= "$k=$x\t"; } } chop ( $txt ); $txt .= "\n"; } else { $txt = "$TIM $KB $TY "; $txt .= "$MD5 " if ( defined($OPT{'md5'}) ); $txt .= "$PER " if ( !defined($OPT{'noperms'}) && !defined($OPT{'min'}) ); $txt .= "$UID " if ( !defined($OPT{'nouid'}) && !defined($OPT{'min'}) ); $txt .= "$LI " if ( !defined($OPT{'nolink'}) && !defined($OPT{'min'}) ); $txt .= "$INO " if ( defined($OPT{'i'}) || defined($OPT{'inode'}) || defined($OPT{'inodes'})); $txt .= "$fs_id " if ( defined($OPT{'fsid'}) ); if ( defined ( $more_hr ) ) { foreach my $k ( sort ( keys ( %{$more_hr} ) ) ) { $x = $more_hr->{$k}; $x = sprintf('%-16s',$x) if ( $k eq 'ct' ); $txt .= "$x "; } } $txt .= "$PRF\n"; } return ( $txt ); } sub rd_f { my ( $fn ) = @_; my ( $fh, $buf, $aline ); open ( $fh, "<$fn" ) or return ("ERROR cannot open '$fn'"); $buf = ''; while ( defined ( $aline = <$fh> ) ) { $buf .= $aline; } close ( $fh ); return ( $buf ); } sub wr_f { my ( $fn, $buf ) = @_; my ( $fh ); open ( $fh, ">$fn" ) or return ("ERROR cannot open '$fn'"); if ( !print ( $fh $buf ) ) { close ( $fh ); return ("ERROR cannot write to '$fn'"); } close ( $fh ); return ( 'OK' ); } sub ap_f { my ( $fn, $buf ) = @_; my ( $fh ); open ( $fh, ">>$fn" ) or return ("ERROR cannot open '$fn'"); if ( !print ( $fh $buf ) ) { close ( $fh ); return ("ERROR cannot append to '$fn'"); } close ( $fh ); return ( 'OK' ); } ######################################################### # FTYPE functions sub type_file { my ( $filename, $how_hard, $punt ) = @_; $how_hard = "hardest" unless ( defined ( $how_hard ) ); $punt = 0 unless ( defined ( $punt ) ); if ( ( $how_hard eq "name" ) || ( $how_hard eq "suffix" ) ) { return ( typify_try_via_suffix ( $filename ) ); } my ( $ok, $b_raw, $b_hex, $b_txt ) = qd ( $filename, 32 ); my $ar = [ $ok, $b_raw, $b_hex, $b_txt, $filename ]; my $type = 'binary/unknown'; print ( "$b_hex | $b_txt | " ) if ( $DEBUGGING > 0 ); return ( typify_try_via_suffix($filename) ) if ( $ok ne 'OK' ); # zzapp -- FIXME: probably want some other behavior when !$punt $type = typify_try ( $ar ); if ( $how_hard eq "easy" ) { $type = typify_try_via_suffix ( $filename ) if ( $punt && ( ( $type eq 'binary/unknown' ) || ( $type eq 'text/plain' ) ) ); return ( $type ); } $type = typify_try_harder ( $ar ) if ( $type eq 'binary/unknown' ); # zzapp if ( $how_hard eq "hard" ) { $type = typify_try_via_suffix ( $filename ) if ( $punt && ( ( $type eq 'binary/unknown' ) || ( $type eq 'text/plain' ) ) ); return ( $type ); } $type = typify_try_much_harder ( $filename ) if ( $type eq 'binary/unknown' ); # zzapp $type = typify_try_via_suffix ( $filename ) if ( $punt && ( ( $type eq 'binary/unknown' ) || ( $type eq 'text/plain' ) ) ); return ( $type ); } sub typify_try_via_suffix { my ( $f ) = @_; print ( "typify_try_via_suffix: f=[$f]\n") if ( $DEBUGGING > 4 ); return ( 'binary/unknown' ) unless ( $f =~ /\.(\w+)$/ ); my $suf = lc($1); print ( "typify_try_via_suffix: suf=[$suf]\n") if ( $DEBUGGING > 4 ); return ( 'binary/unknown' ) unless ( defined ( $SUFFIXES{$suf} ) ); return ( $SUFFIXES{$suf} ); } sub typify_try { my ( $ar ) = @_; my $px = substr ( $ar->[1], 0, 1 ); print ( "typify_try: b_hex=$ar->[2] px=$px\n") if ( $DEBUGGING > 4 ); return ( 'binary/unknown' ) unless ( defined ( $PREFIXES{$px} ) ); print ( "typify_try: matched p\n") if ( $DEBUGGING > 4 ); foreach my $bink ( keys ( %{$PREFIXES{$px}} ) ) { my $binbuf = substr ( $ar->[1], 0, length($bink) ); return ( $PREFIXES{$px}->{$bink} ) if ( $binbuf eq $bink ); } return ( 'binary/unknown' ); } sub typify_try_harder { my ( $ar ) = @_; foreach my $rex_ar ( @REXSIGS ) { my ( $type, $rex, $mod ) = @{$rex_ar}; if ( $mod eq 'i' ) { return ( $type ) if ( $ar->[1] =~ /$rex/is ); } else { return ( $type ) if ( $ar->[1] =~ /$rex/s ); } } return ( "text/plain" ) unless ( $ar->[1] =~ /[\000-\010\013\016-\037\177-\377]/s ); # zzapp -- implement .mov/.qt substring lookups return ( 'binary/unknown' ); } sub typify_try_much_harder { my ( $f ) = @_; # zzapp -- implement this you hoser -- grab a bigger chunk of the file and try typify_try() and typify_try_harder() return ( 'binary/unknown' ); } sub qd { my ( $f, $k ) = @_; my $b_raw = ''; my $b_hex = ''; my $b_txt = ''; my $fh = rand(); return ( "ERROR", "cannot open file", "", "" ) unless ( sysopen ( $fh, $f, O_RDONLY ) ); sysread ( $fh, $b_raw, $k ); foreach my $c ( split ( //s, $b_raw ) ) { $b_hex .= sprintf ( "%02x ", ord($c) ); if ( ( ord($c) < 127 ) && ( ord($c) >= 31 ) ) { $b_txt .= $c; } else { $b_txt .= '.'; } } chop ( $b_hex ); close ( $fh ); return ( "OK", $b_raw, $b_hex, $b_txt ); } # END FTYPE ######################################################### sub usage { print ("dy [options] [pathlist]\n"); print (" -maxdepth= Set cutoff depth for recursive descent\n"); print (" -a Display size in bytes rather than KB\n"); print (" -b Same as -a\n"); print (" -ct[=] Use /usr/bin/file to describe file contents (limit description to chars)\n"); print (" -ct-noz Tell /usr/bin/file to not decompress compressed data when determining type\n"); print (" -dump[=] Show the first N bytes of the file in both dotted-printable and hex formats (default 16)\n"); print (" -hash Emit records in hash format (path first, then tab-delimited