#!/usr/bin/perl # pluckbook: generates a Plucker compatible pdb file given a html # file. Handles compressed html files. # Version 2.0 # Author: elf@ee.ryerson.ca, Jun 6, 2002 # $Id: pb,v 1.1 2002/09/21 15:32:34 elf Exp $ my $PLUCKDIR="$ENV{HOME}/palm/plucker"; # this is where plucked files go #location of the plucker program my $PLUCKER="/usr/local/plucker-1.1.12/bin/plucker-build "; my $archtype=0; # 0=uncompressed, 1=gzip, 2=zip my $DOC_TITLE; if ($#ARGV <2) { print("Usage: pluckbook htmlfile depth pdbfile\n\"htmlfile\" may be compressed (.gz, .Z, .zip).\n"); exit(0); } #if the file is compressed, uncompress it before plucking if ($ARGV[0] =~ /\.(gz|Z)/) { print("Uncompressing $ARGV[0] with gunzip...\n"); $archtype=1; system("gunzip $ARGV[0]"); }elsif($ARGV[0] =~ /\.zip/){ print("Uncompressing $ARGV[0] with unzip...\n"); $archtype=2; system("unzip $ARGV[0]"); } $ARGV[0]=~s/\.(gz|Z|zip)//; # strip .gz, .Z or .zip extension #print("$ARGV[0]\n"); open(FP, "<$ARGV[0]") || die "Could not open $ARGV[0] for reading\n"; # find the tag in the file and extract the title. Or if it's # a plain text file, look for "Title:" while(<FP>){ chop; if(/<(title|TITLE)>/){ s{<(title|TITLE)>}{}g; s{</(title|TITLE)>}{}g; $DOC_TITLE=$_; close FP; }elsif(/[Tt]itle[ :]/){ s{[Tt]itle[ :]}{}g; $DOC_TITLE=$_; close FP; } } # If title cannot be extracted, use the filename without the extension if(!$DOC_TITLE){ print "Could not get title. "; $DOC_TITLE=$ARGV[0]; $DOC_TITLE=~s/\.html//g; } print "Plucking with document title: $DOC_TITLE\n"; system("$PLUCKER -q --stayonhost --pluckerdir=$PLUCKDIR --home-url=\"$ARGV[0]\" -f $ARGV[2] -N \"$DOC_TITLE\" --category Books --maxdepth=$ARGV[1]"); if($archtype==1){ print "Re-compressing $ARGV[0]\n"; system("gzip $ARGV[0]\n"); }elsif($archtype==2){ print "Re-compressing $ARGV[0]\n"; system("zip $ARGV[0]\n"); }