Perl Script to Parse PDF Invoice and Send Email

Written by James McDonald

November 7, 2016

This script uses perl to run pdftotext to grab the text from a PDF Invoice.

It parses and extracts document meta data and then uses that to construct a meaningful filename and attach and send the PDF to the correct recipient.

You could automate this by placing a watch on a folder and run this script on each new filename that appears…

#!/usr/bin/perl
#
# @File process_invoice.pl
# @Author jmcd
# @Created 03/06/2015 1:33:28 PM
#

use strict;
use File::Copy;
use PDF::API2;
use File::Basename;
use Email::Address;
use MIME::Lite;
my $ds = '/';
my $from = 'James McDonald <[email protected]>';
my $input = $ARGV[0] or usage();
my @cc = Email::Address->parse($ARGV[1]) if defined $ARGV[1];
my $cc_address = join(',', @cc);
print "$cc_address\n";
my $pdftotext = '/usr/local/bin/pdftotext';
my $prefix = "Tax Invoice";
my $invoice;
my $invoice_root = '/home/jm/dropbox/jmits/invoices';



my $customer;
my $cust_email;
my $suffix = "JMITS";
my  $body = "Please find attached invoice from James McDonald IT Services for services rendered\r\n";
    $body .= "Regards\r\n";
    $body .= "James McDonald IT Services\r\n";
# get the path to the input file so we 
# place the output file next to it
my $dirname = dirname($input);
#print $input . "\n";
my $pdf_convert = $pdftotext . " " . "\"$input\"" . " " . "-";
#print $pdf_convert . "\n";
open(FH, '-|', $pdf_convert ) or die $!;
open(TXT, ">", "out.txt") or die $!;
my $ctr = 0;
while ( my $line = <FH>) {
    #print $ctr++ . "\n";
    
    # invoices starts with a 7 and is 5 digits long
    if( $line =~ /^Tax\s+invoice\s+(7\d{4})/i ) {
        $invoice = $1;
    };
    if( $line =~ /Customer No(:|.) (\w+)/ ) {
        $customer = $2;
    };
    if($line =~ /@/) {
        my @address = Email::Address->parse($line);
        $cust_email = shift @address;
    }
        print TXT $line;
}

close(TXT);
close(FH);

print "Invoice: $invoice\n";
print "Customer: $customer\n";
my $subject = "Tax Invoice $invoice from James McDonald IT Services";
if ( $cust_email ne '') {
    print $cust_email . "\n"
};


my $cust_email2 = $cust_email;

# override default because I don't want to send straight away
$cust_email = '[email protected]';

my $filename = "$prefix $invoice for $customer from $suffix";
my $file_ext = ".pdf";
my $fullname = $dirname . $ds . 'archive' . $ds . $filename.$file_ext;

my $filename = "$prefix $invoice for $customer from $suffix";

print $fullname . "\n";
copy($input, $fullname) or die $!;
#print "Filename: $prefix $invoice for $customer from $suffix\n";
print "Email is being sent to: " . $cust_email . ' and ' . $cc_address . "\n";
my $pdf = PDF::API2->open($fullname);

# populate the PDF properties with relavent info
my %new_info = (
    'Title' => $filename,
    'Subject' => $subject
);
my %h = $pdf->info(%new_info);

#print "Title: $h{Title}\n";
$pdf->update();

### Create a new multipart message:
   my $msg = MIME::Lite->new(
        From    => $from,
        To      => $cust_email,
        Cc      => "$cc_address",
        Subject => $subject,
        Type    => 'multipart/mixed'
);
### Add parts (each "attach" has same arguments as "new"):
    $msg->attach(
        Type     => 'TEXT',
        Data     => $body
);
$msg->attach(
        Type     => 'application/pdf',
        Path     => $fullname,
        Filename => $filename . $file_ext,
        Disposition => 'attachment'
);
### send with sendmail 
$msg->send();
sub usage() {
    print "Usage:\n";
    print "arg 1 = input PDF\narg 2 = comma separated cc email list\n";
    print "process_inovice.pl input.pdf name1\@example.com,name2\@otherexample.com\n";
    exit;
}
sub trim {
    my $string = shift;
    $string =~ s/^\s+//;
    $string =~ s/\s+$//;
    return $string;
}
sub underscores() {
    my $us = shift;
    $us =~ s/ /_/g;
    return $us
}

a

0 Comments

Trackbacks/Pingbacks

  1. xTuple Postbooks Edition 4.10.0 RC FTW | The Southern IT Observer - […] What I’ve found missing has been email from the Postbooks Edition application. But because the “connect” module brings a…

Submit a Comment

Your email address will not be published. Required fields are marked *

This site is protected by reCAPTCHA and the Google Privacy Policy and Terms of Service apply.

The reCAPTCHA verification period has expired. Please reload the page.

You May Also Like…