This script uses perl to run pdftotext to grab the text from a PDF Invoice.
It parses and extracts document meta data and then uses that to construct a meaningful filename and attach and send the PDF to the correct recipient.
You could automate this by placing a watch on a folder and run this script on each new filename that appears...
#!/usr/bin/perl # # @File process_invoice.pl # @Author jmcd # @Created 03/06/2015 1:33:28 PM # use strict; use File::Copy; use PDF::API2; use File::Basename; use Email::Address; use MIME::Lite; my $ds = '/'; my $from = 'James McDonald <[email protected]>'; my $input = $ARGV[0] or usage(); my @cc = Email::Address->parse($ARGV[1]) if defined $ARGV[1]; my $cc_address = join(',', @cc); print "$cc_address\n"; my $pdftotext = '/usr/local/bin/pdftotext'; my $prefix = "Tax Invoice"; my $invoice; my $invoice_root = '/home/jm/dropbox/jmits/invoices'; my $customer; my $cust_email; my $suffix = "JMITS"; my $body = "Please find attached invoice from James McDonald IT Services for services rendered\r\n"; $body .= "Regards\r\n"; $body .= "James McDonald IT Services\r\n"; # get the path to the input file so we # place the output file next to it my $dirname = dirname($input); #print $input . "\n"; my $pdf_convert = $pdftotext . " " . "\"$input\"" . " " . "-"; #print $pdf_convert . "\n"; open(FH, '-|', $pdf_convert ) or die $!; open(TXT, ">", "out.txt") or die $!; my $ctr = 0; while ( my $line = <FH>) { #print $ctr++ . "\n"; # invoices starts with a 7 and is 5 digits long if( $line =~ /^Tax\s+invoice\s+(7\d{4})/i ) { $invoice = $1; }; if( $line =~ /Customer No(:|.) (\w+)/ ) { $customer = $2; }; if($line =~ /@/) { my @address = Email::Address->parse($line); $cust_email = shift @address; } print TXT $line; } close(TXT); close(FH); print "Invoice: $invoice\n"; print "Customer: $customer\n"; my $subject = "Tax Invoice $invoice from James McDonald IT Services"; if ( $cust_email ne '') { print $cust_email . "\n" }; my $cust_email2 = $cust_email; # override default because I don't want to send straight away $cust_email = '[email protected]'; my $filename = "$prefix $invoice for $customer from $suffix"; my $file_ext = ".pdf"; my $fullname = $dirname . $ds . 'archive' . $ds . $filename.$file_ext; my $filename = "$prefix $invoice for $customer from $suffix"; print $fullname . "\n"; copy($input, $fullname) or die $!; #print "Filename: $prefix $invoice for $customer from $suffix\n"; print "Email is being sent to: " . $cust_email . ' and ' . $cc_address . "\n"; my $pdf = PDF::API2->open($fullname); # populate the PDF properties with relavent info my %new_info = ( 'Title' => $filename, 'Subject' => $subject ); my %h = $pdf->info(%new_info); #print "Title: $h{Title}\n"; $pdf->update(); ### Create a new multipart message: my $msg = MIME::Lite->new( From => $from, To => $cust_email, Cc => "$cc_address", Subject => $subject, Type => 'multipart/mixed' ); ### Add parts (each "attach" has same arguments as "new"): $msg->attach( Type => 'TEXT', Data => $body ); $msg->attach( Type => 'application/pdf', Path => $fullname, Filename => $filename . $file_ext, Disposition => 'attachment' ); ### send with sendmail $msg->send(); sub usage() { print "Usage:\n"; print "arg 1 = input PDF\narg 2 = comma separated cc email list\n"; print "process_inovice.pl input.pdf name1\@example.com,name2\@otherexample.com\n"; exit; } sub trim { my $string = shift; $string =~ s/^\s+//; $string =~ s/\s+$//; return $string; } sub underscores() { my $us = shift; $us =~ s/ /_/g; return $us }
a
0 Comments
Trackbacks/Pingbacks