#!/usr/bin/perl -w # # $Id$ # # Purpose: convert a gnucash XML file to a serie of SQL requests populating a # SQL-Ledger database. # # Usage: call with -h or see HELP_MESSAGE below # # Current status: we can parse most of the gnucash file and properly import all # transactions. We are currently having problems with payments on multiple # bills and taxes. # # Those problems and others are noted with the XXX sign in the code. # # The bugs of the script were moved to http://bugs.koumbit.net/ # use XML::Simple qw(:strict); use Data::Dumper; use Getopt::Std; $Getopt::Std::STANDARD_HELP_VERSION = 1; $Getopt::Std::STANDARD_HELP_VERSION = 1; # silence a warning # this is just the regular usage sub HELP_MESSAGE() { print < -h: this help -y: always use the default answer, do not prompt -d : print parse tree to , overwriting it -c : run interactively, merge with the COA structure defined in -m : output the mapping between gnucash and SL accounts in a human readble form to , overwriting it This will attempt a conversion of the GnuCash file in a serie of Postgresql requests on standard output. Using '-' as a filename to parse from stdin will cause obvious problems with the -c switch and is therefore not permitted. -c will also add the given file to the SQL output. Examples: Just import the whole thing normally. This will overwrite everything in the database. % ./gnc20.perl foo.gnucash > import.sql Try to merge with the Belgium COA. Note that this will print on stdout. % ./gnc20.perl -c /usr/lib/sql-ledger/sql/Belgium-chart.sql foo.gnucash EOF ; } # figure out a "posted" date by the posted and entered field # # defaults to dateposted (arg1) or dateentered (arg2) if dateposted is invalid # or now if both are unavailable sub finddateposted { my ($dateposted, $dateentered) = @_; $dateposted =~ s/ .*$//g if ($dateposted); unless ($dateposted) { $dateposted = $dateentered; $dateposted =~ s/ .*$//g if ($dateposted); } # if we still don't have a date assign one unless ($dateposted) { warn("falling back to now\n"); @d = localtime(time); $d[4]++; $d[5] += 1900; $dateposted = sprintf("%.2d-%.2d-%.2d", $d[5], $d[4], $d[3]); } return $dateposted; } # small arithmetics: compute a X/Y expression sub compute_amount { my ($amount,$divisor) = split(/\//, shift(@_)); return ($amount / $divisor); } # parse options my %opts = (); if (!getopts('hyc:d:m:', \%opts) || $opts{'h'} || $#ARGV < 0 || ($opts{'c'} && ($ARGV[0] eq '-'))) { HELP_MESSAGE(); exit(); } # # Mapping between gnucash accounts account types and SL # %acctype = ( BANK => { 'type' => 'asset', link => 'AR_paid:AP_paid' }, ASSET => { type => 'asset', link => 'AR_paid:AP_paid' }, CREDIT => { type => 'liability', link => 'AR_paid:AP_paid' }, EQUITY => { type => 'equity', link => '' }, EXPENSE => { type => 'expense', link => 'AP_amount:IC_cogs:IC_expense' }, INCOME => { type => 'income', link => 'AR_amount:IC_sale:IC_income' }, MUTUAL => { type => 'asset', link => 'AR_paid:AP_paid' }, STOCK => { type => 'asset', link => 'AR_paid:AP_paid' }, # Anarcat, 10/03/2005, necessary for a 2.0.0 book RECEIVABLE => { type => 'asset', link => 'AR' }, LIABILITY => { type => 'liability', link => 'AP_paid'}, PAYABLE => { type => 'liability', link => 'AP' }, ); # # Charter of accounts mappings # %chart = ( asset => { accno => 1000, category => 'A' }, liability => { accno => 2000, category => 'L' }, equity => { accno => 3000, category => 'Q' }, income => { accno => 4000, category => 'I' }, expense => { accno => 5000, category => 'E' } ); # a part of the reverse of the above %chart_short = ( 'A' => 'asset', 'L' => 'liability', 'Q' => 'equity', 'I' => 'income', 'E' => 'expense', 'C' => 'asset', # XXX: not sure about this. Example: Accum. Amort. -Vehicle ); # the account uuid => accno mapping %accid = (); # the accid cache $accdump = "accid.dump"; warn("parsing Gnucash file $ARGV[0], please be patient\n"); $ref = XMLin($ARGV[0], ForceArray => 0, KeyAttr => []); if ($opts{'d'}) { open(DEBUG, "> $opts{'d'}") || warn("can't open debug outfile"); #print DEBUG Dumper($ref); } # first empty the tables we're going to fill print qq|DELETE FROM chart; DELETE FROM acc_trans; DELETE FROM gl; DELETE FROM customer; DELETE FROM vendor; DELETE FROM customertax; DELETE FROM vendortax; DELETE FROM ap; DELETE FROM ar; DELETE FROM invoice; DELETE FROM parts; DELETE FROM partstax; DELETE FROM tax; |; # # "main": this is where everything is called # # # parse the file's options # $options = parse_options($ref); # # parse accounts and keep track of guid => id # my $slotid = parse_accounts($ref, $opts{'c'}); # # taxes is taxtable guid => rate # my $taxes = parse_taxes($ref, \%accid); # # parse Customer/vendor and keep track of guid => id mappings # my $tierid = parse_tiers($ref, $taxes); # # parse invoices and keep track of slot and invoice id mappings # my ($lotid, $invid) = parse_invoices($ref, $tierid); # # fill the invoice table # parse_entries($ref, $invid, $taxes, $options, \%accid); # # finally, parse transactions # parse_trans($ref, \%accid, $lotid, $invid, $slotid); # # cleanup display, close files and save the accid map # sub cleanup { print STDERR "\n"; dumpaccid(); close(DEBUG) if $opts{'d'}; exit(); } # # this dumps the accid map for other sessions # sub dumpaccid { $d = Data::Dumper->new([\%accid], ['accid']); open (DMP, "> $accdump") || warn ("cannot dump the current accid map to $accdump: $!\n"); print DMP $d->Dump; close(DMP); print STDERR "saved dump to $accdump\n"; } # # ACCOUNTS CONVERSION # sub parse_accounts { my $ref = shift(@_); my $charter = shift(@_); my %slotid = (); my %chart_str; if (defined($charter)) { open(CHART, "< $charter") || die("cannot open charter file $charter\n"); while () { # the possible account categories $category = join('', keys(%chart_short)); # find the accno ($1), the account name ($2) and its category ($3) if (/^INSERT\s*INTO\s*chart\s*(?:\(accno,\s*description,\s*charttype,\s*category,\s*.*\)\s*)?VALUES\s*\('(\d+)',\s*'((?:(?:'')|[^'])+)',\s*'.*',\s*'([$category])'/) { $chart_str{$1} = [$2, $3]; # keep track of the max accno for a given acctype $type = $chart_short{$3}; if (!defined($chart{$type}->{accno}) || $1 > $chart{$type}->{accno}) { $chart{$type}->{accno} = $1; } print; # we merge, so we print the selected COA along with our own } elsif (/INSERT.*INTO.*chart/) { warn "skipping possibly valuable statement: $_"; } } close(CHART); } { local(*INPUT, $/); if (open (INPUT, $accdump)) { $var = ; $tmp = eval $var; %accid = %{$tmp}; } else { warn "cannot open $accdump: $!, starting with clean state\n"; } } if (ref($ref->{"gnc:book"}->{"gnc:account"}) ne "ARRAY") { $ref->{"gnc:book"}->{"gnc:account"} = [ $ref->{"gnc:book"}->{"gnc:account"} ]; } # cache @sorted = sort keys %chart_str; $SIG{'INT'} = \&cleanup; if ($opts{'y'}) { open(OLDERR, ">&STDERR"); close(STDERR); open(STDERR, ">/dev/null"); } foreach $acc (@{ $ref->{"gnc:book"}->{"gnc:account"} }) { $accno = undef; $acc->{"act:name"} =~ s/'/''/g; #' $mapping_exists = defined($accid{$acc->{"act:id"}{content}}); $addnew = !defined($charter); # $addnew is a cache if (!$addnew) { # ie if we're interactive $_ = undef; LOOP: { do { if (defined($_)) { chomp; if (/^\?$/) { if ($mapping_exists) { print STDERR <{"act:id"}{content}}; last; } elsif (($mapping_exists && /^n$/i) || (!$mapping_exists && /^n?$/i)) { $addnew = 1; last; } elsif (/^l$/i) { $all = /^L$/; foreach (@sorted) { #print DEBUG "_: $_, $chart_str{$_}, str: " . Dumper($chart_str{$_}). ", 1: $chart_str{$_}[1], short: $chart_short{$chart_str{$_}[1]}, type: $acctype{$acc->{'act:type'}}{'type'}\n" if $opts{'d'}; if (defined($_) && defined($chart_str{$_}) && @{$chart_str{$_}} && ($all || $chart_short{$chart_str{$_}[1]} eq $acctype{$acc->{"act:type"}}{'type'})) { print STDERR "$_: ".$chart_str{$_}[0].", " . $chart_str{$_}[1] ."\n"; } elsif (defined($_) && ($all || $chart_short{$chart_str{$_}[1]} eq $acctype{$acc->{"act:type"}}{'type'})) { print STDERR "$_: \n"; } elsif (!defined($_)) { print STDERR "undef entry\n"; } } } elsif (/^s$/) { $addnew = 0; last; } elsif (/^\d+$/) { $init = 1; $addnew = 0; chomp; $accno = $_; if (!exists $chart_str{$accno}) { $init = 0; $addnew = 0; warn "$_ is not a known account id\n"; INNER: { $_ = '' if ($opts{'y'}); do { if (/^y?$/i) { $addnew = $init = 1; last; } elsif (/^n$/i) { $init = 0; last; } print STDERR "create? (Y/n): "; } while ($opts{'y'} || ($_ = )); }; } if ($init) { $accid{$acc->{"act:id"}{content}} = $accno; last; } } } print STDERR "account: $acc->{'act:name'}, $acctype{$acc->{'act:type'}}{'type'}\n"; if ($mapping_exists) { print STDERR "I know about this mapping: $accid{$acc->{'act:id'}{content}}"; if (defined($chart_str{$accid{$acc->{'act:id'}{content}}}[0])) { print STDERR ", $chart_str{$accid{$acc->{'act:id'}{content}}}[0]"; } else { print STDERR " (not a known SL account)"; $addnew = 1; } print STDERR "\n"; print STDERR "what should I do with it? (A/n/s/l/L/.../?): "; } else { print STDERR "what should I do with it? (N/s/l/L/.../?): "; } # do not prompt, ie. "enter" all the time $_ = '' if ($opts{'y'}); } while ($opts{'y'} || ($_ = )); } } if ($addnew) { if (!defined($accno)) { $accno = ++$chart{$acctype{$acc->{"act:type"}}{'type'}}->{accno}; } print qq|INSERT INTO chart (accno,description,category,link) VALUES ('$accno','$acc->{"act:name"}','$chart{$acctype{$acc->{"act:type"}}{'type'}}->{category}','$acctype{$acc->{"act:type"}}{link}');\n|; # save id $accid{$acc->{"act:id"}{content}} = $accno; # add this to our known chart strings $chart_str{$accno} = [ $acc->{'act:name'}, $chart{$acctype{$acc->{'act:type'}}{'type'}}{'category'} ]; # refresh the cache otherwise the listing won't work @sorted = sort keys %chart_str; $accno = undef; } } if ($opts{'y'}) { close(STDERR); open(STDERR, ">&OLDERR"); close(OLDERR); } dumpaccid(); if ($opts{'m'}) { print STDERR "printing to $opts{'m'}\n"; open(MAPPINGS, "> $opts{'m'}") || warn("can't open mapping outfile"); foreach $acc (@{ $ref->{"gnc:book"}->{"gnc:account"} }) { print MAPPINGS $acc->{'act:name'} . " -> " . $accid{$acc->{'act:id'}{content}}." " . $chart_str{$accid{$acc->{'act:id'}{content}}}[0] . "\n"; } close(MAPPINGS); } # look for "slots" in account configurations # slots are generic mappings between elements. we're looking for invoice mappings because some transactions are mapped to slots instead of invoices foreach $acc (@{ $ref->{"gnc:book"}->{"gnc:account"} }) { print DEBUG Dumper($acc) if $opts{'d'}; if (defined($acc->{"act:lots"}) && ref($acc->{"act:lots"}) eq "HASH" && ref($acc->{"act:lots"}{"gnc:lot"}) eq "ARRAY") { foreach $lot (@{ $acc->{"act:lots"}{"gnc:lot"} }) { $slots = $lot->{"lot:slots"}{"slot"}; if (ref($slots) ne "ARRAY") { $slots = [ $slots ]; } foreach $slot (@{ $slots }) { if ($slot->{"slot:key"} eq "gncInvoice") { if (ref($slot->{"slot:value"}) ne "ARRAY") { $slot->{"slot:value"} = [ $slot->{"slot:value"} ]; } foreach $s (@{ $slot->{"slot:value"} } ) { if ($s->{"type"} eq "frame") { $slotid{$lot->{"lot:id"}{"content"}} = $s->{"slot"}{"slot:value"}{"content"}; } else { warn("unexpected: not a frame found as lot:slots"); } } } else { # XXX: we've seen "gncOwner", which I don't exactly understand what it's for #warn("skipping slot ".$slot->{"slot:key"}); } } } } } print DEBUG "slotid: " . Dumper(%slotid) if $opts{'d'}; return \%slotid; } # # TRANSACTION CONVERSION # # In the acc_trans table, for each invoice, there has to be an entry # of the type: # # trans_id chart_id amount # X (ar) -Y # X (sales) Y # X (ar) Z # X (cheque) -Z # # Where: # X: the bill id (in ar) # Y: the bill amount # Z: a payment on the bill. Those two transactions are repeated for each payment # # trans_id always refers to ar or ap sub parse_trans { my $ref = shift(@_); my $accid = shift(@_); my $lotid = shift(@_); my $invid = shift(@_); my $slotid = shift(@_); my %invid_table = (); my $table; while ((undef, $value) = each %{$invid}) { $invid_table{$value->[0]} = $value->[1]; } if (ref($ref->{"gnc:book"}->{"gnc:transaction"}) ne "ARRAY") { $ref->{"gnc:book"}->{"gnc:transaction"} = [ $ref->{"gnc:book"}->{"gnc:transaction"} ]; } foreach $trn (@{ $ref->{"gnc:book"}->{"gnc:transaction"} }) { $dateposted = finddateposted($trn->{"trn:date-posted"}->{"ts:date"}, $trn->{"trn:date-entered"}->{"ts:date"}); $notes = ""; if (ref $trn->{"trn:slots"}{"slot"} ne "ARRAY") { $slots = [ $trn->{"trn:slots"}{"slot"} ]; } else { $slots = $trn->{"trn:slots"}{"slot"}; } $table = 'gl'; # default table is gl $invoice = undef; # look if we're in a "invoice-related transaction" foreach $slot (@{ $slots }) { if (defined($slot)) { # we discard slot:key : 'trans-txn-type' = P, it seems irrelevant if ($slot->{"slot:key"} eq 'notes') { $notes = $slot{"slot:value"}{content} || ""; $notes =~ s/'/''/g; # ' } elsif ($slot->{"slot:key"} eq 'gncInvoice') { $_id = $slot->{"slot:value"}{'slot'}{'slot:value'}{'content'}; # change the destination table $table = $invid->{$_id}[1]; $invoice = $invid->{$_id}[0]; } } } $description = $trn->{"trn:description"}; $description =~ s/'/''/g; # ' $description = "N/A" if (ref $description eq 'HASH'); # the only thing that distinguishes a gl entry from a payment or a bill entry is the split "action" and "lot" entries # look for them # # this actually sets defaults for the whole transaction that can be overriden in the splits below if (ref $trn->{"trn:splits"}{"trn:split"} eq 'ARRAY') { foreach $split (@{ $trn->{"trn:splits"}{"trn:split"} }) { # XXX: we assume that if there's a :lot entry, there's an :action entry if (defined($split->{'split:lot'})) { # this is a payment or a bill entry, reset the table so that we seek it below $invoice = $lotid->{$split->{'split:lot'}{'content'}}; if (!defined($invoice)) { if (!defined($slotid->{$split->{'split:lot'}{'content'}}) || !defined($lotid->{$slotid->{$split->{'split:lot'}{'content'}}})) { warn("no invoice reference found in transaction ".$trn->{"trn:id"}{"content"}.", trying to import as a gl entry\n"); $table = 'gl'; } else { $invoice = $lotid->{$slotid->{$split->{'split:lot'}{'content'}}}; $table = $invid_table{$invoice}; } } else { $table = $invid_table{$invoice}; } } } } # XXX? note that we drop all this information if we have a ar/ap transaction if (defined($table) && $table eq 'gl') { print qq|INSERT INTO gl (reference,description,transdate,notes) VALUES ('$trn->{"trn:id"}{content}','$description','$dateposted','$notes');\n|; } # add acc_trans if (ref $trn->{"trn:splits"}{"trn:split"} eq 'ARRAY') { foreach $split (@{ $trn->{"trn:splits"}{"trn:split"} }) { if (defined($table) && $table eq 'gl') { $invoice = qq|(SELECT id FROM gl WHERE reference = '$trn->{"trn:id"}{content}')|; } # XXX: we assume that if there's a :lot entry, there's an :action entry if (defined($split->{'split:lot'})) { # this is a payment or a bill entry, reset the table so that we seek it below $i = $lotid->{$split->{'split:lot'}{'content'}}; if (defined($i)) { $invoice = $i; $table = $invid_table{$invoice}; } } if (!defined($trn->{"trn:num"})) { $trn->{"trn:num"} = ''; } $trn->{"trn:num"} =~ s/'/''/g; #' $reconciled = ($split->{"split:reconciled-state"} eq 'y') ? '1' : '0'; $amount = compute_amount($split->{"split:value"}) * -1; $source = qq|'$trn->{"trn:num"}'|; $chart_id = qq|(SELECT id FROM chart WHERE accno = '$accid->{$split->{"split:account"}{content}}')|; print qq|INSERT INTO acc_trans (trans_id,chart_id,amount,transdate,source,cleared) VALUES ($invoice, $chart_id,$amount,'$dateposted',$source,'$reconciled');\n| } } # XXX: what does this mean? I haven't seen such an entry yet. if (ref $trn->{"trn:splits"}{"trn:split"} eq 'HASH') { warn("I don't know what this is. Please contact anarcat\@anarcat.ath.cx with details."); ($amount,$divisor) = split(/\//, $trn->{"trn:splits"}{"trn:split"}{"split:value"}); $amount = $amount / $divisor * -1; if ($amount == 0) { $amount = 0.01; } #if ($split->{'split:lot'}) { #print STDERR "split:lot found in hash, invoice: ", $lotid->{$split->{'split:lot'}{'content'}} . "\n"; #} $reconciled = ($trn->{"trn:splits"}{"trn:split"}{"split:reconciled-state"} eq 'y') ? '1' : '0'; for (1 .. 2) { print qq|INSERT INTO acc_trans (trans_id,chart_id,amount,transdate,source,cleared) VALUES ((SELECT id FROM gl WHERE reference = '$trn->{"trn:id"}{content}'), (SELECT id FROM chart WHERE accno = '$accid->{$trn->{"trn:splits"}{"trn:split"}{"split:account"}{content}}'),$amount,'$dateposted','$trn->{"trn:num"}','$reconciled');\n|; $amount *= -1; } } $reference = $trn->{"trn:num"}; $reference = $trn->{"trn:description"} unless $reference; $reference = $trn->{"trn:id"}{content} unless $reference; $reference =~ s/'/''/g; #' $reference = "N/A" if (ref $reference eq 'HASH'); if ($table eq 'gl') { print qq|UPDATE gl SET reference = '$reference' WHERE reference = '$trn->{"trn:id"}{content}';\n|; } } print qq|UPDATE ar SET paid = (SELECT SUM(amount) FROM acc_trans WHERE amount > 0 AND trans_id = ar.id AND chart_id = (SELECT id FROM chart WHERE link = 'AR' )), datepaid = (SELECT MAX(transdate) FROM acc_trans WHERE amount > 0 AND trans_id = ar.id AND chart_id = (SELECT id FROM chart WHERE link = 'AR' ));\n|; print qq|UPDATE ap SET paid = -1 * (SELECT SUM(amount) FROM acc_trans WHERE amount < 0 AND trans_id = ap.id AND chart_id = (SELECT id FROM chart WHERE link = 'AP')), datepaid = (SELECT MAX(transdate) FROM acc_trans WHERE amount < 0 AND trans_id = ap.id AND chart_id = (SELECT id FROM chart WHERE link = 'AP' ));\n|; print qq|UPDATE ar SET paid = 0 WHERE paid IS NULL;\n|; print qq|UPDATE ap SET paid = 0 WHERE paid IS NULL;\n|; } # # CUSTOMER/VENDOR CONVERSION # # Vendor and customer structures are *very* similar, if not identical # in both programs, so we use a bit of perl reference magic to make a # single loop. # # Not parsed: # # 'version' => '2.0.0', # 'cust:shipaddr' => { 'version' => '2.0.0' }, # 'cust:use-tt' => '0', # 'cust:credit' => '0/1', # 'cust:discount' => '0/1', # 'cust:active' => '1', # 'cust:guid' => { # 'content' => '3b1722e60683b432ddff73b0fdab8ef1', # 'type' => 'guid' # } # # we make the following assumptions on the addr structure: # - the first line is the civic address # - the second line is the city # - the third line is the zip code # sub parse_tiers { my $ref = shift(@_); my %tierid = (); my %vendor = ( 'node' => 'gnc:GncVendor', 'short' => 'vendor', 'table' => 'vendor', ); my %customer = ( 'node' => 'gnc:GncCustomer', 'short' => 'cust', 'table' => 'customer', ); for $type (\%customer, \%vendor) { if (ref($ref->{"gnc:book"}->{$type->{'node'}}) ne "ARRAY") { $ref->{"gnc:book"}->{$type->{'node'}} = [$ref->{"gnc:book"}->{$type->{'node'}}]; } foreach $entry (@{ $ref->{"gnc:book"}->{$type->{'node'}} }) { # we assume that we are in the ISO4217 currency space if (!$entry->{$type->{'short'}.":currency"}->{"cmdty:space"} eq "ISO4217") { warn("unknown currency space: ". $entry->{$type->{'short'}.":currency"}->{"cmdty:space"}. "\n"); } $id = int($entry->{$type->{'short'}.":id"}); if ($entry->{$type->{'short'}.":taxincluded"} ne "YES") { $taxincluded = "FALSE"; } else { $taxincluded = "TRUE"; } # silence warnings on missing stuff here $addr1 = $entry->{$type->{'short'}.":addr"}->{"addr:addr1"} || ""; $city = $entry->{$type->{'short'}.":addr"}->{"addr:addr2"} || ""; $zipcode = $entry->{$type->{'short'}.":addr"}->{"addr:addr3"} || ""; $contact = $entry->{$type->{'short'}.":addr"}->{"addr:name"} || ""; $phone = $entry->{$type->{'short'}.":addr"}->{"addr:phone"} || ""; $fax = $entry->{$type->{'short'}.":addr"}->{"addr:fax"} || ""; $email = $entry->{$type->{'short'}.":addr"}->{"addr:email"} || ""; $addr1 =~ s/'/''/g; $name = $entry->{$type->{'short'}.":name"}; $name = substr($name, 0, 64); $name =~ s/'/''/g; $addr1 = substr($addr1, 0, 32); $city =~ s/'/''/g; $city = substr($city, 0, 32); $zipcode =~ s/'/''/g; $zipcode = substr($zipcode, 0, 10); $contact =~ s/'/''/g; $contact = substr($contact, 0, 64); $phone =~ s/'/''/g; $phone = substr($phone, 0, 20); $fax =~ s/'/''/g; $fax = substr($fax, 0, 20); print qq|INSERT INTO $type->{'table'} (id, name, address1, city, zipcode, contact, phone, fax, email, taxincluded, curr) VALUES ($id, '$name', '$addr1', '$city', '$zipcode', '$contact', '$phone', '$fax', '$email', $taxincluded, '$entry->{$type->{'short'}.":currency"}->{"cmdty:id"}');\n|; $tierid{$type->{'node'}}{$entry->{$type->{'short'}.":guid"}{"content"}} = $entry->{$type->{'short'}.":id"}; print qq|INSERT INTO $type->{'table'}tax SELECT $id,id FROM chart WHERE link LIKE '%CT_tax%';\n|; } } return \%tierid; } # # INVOICE CONVERSION # # invoice table => gnc:GncInvoice => [ { invoice:owner, ...:postacc, ... } ] # # I *think* the gnc:GncEntry is linked with the invoices. Unclear about exactly # book:slots is. # # On the SL side, there is the "invoice" table, but it doesn't record the owner # of the invoice. This information *might* be in the "oe" table. The entries # themselves might just be sitting in "ar" and "ap", which would be logical. # How to do this: # # 1- keep track of the id => number mapping in vendors/customers (./) # 2- find out how sql-ledger links customers and invoices (./) # 3- find all entries of an invoice (./) # 4- find out how sql-ledger links invoices and "entries" (./) # # HARD DATA about sql-ledger's schema: # - invoices are originally recorded in the 'ar' table, with the "customer_id" column doing the obvious link # - the amount of the bill is recorded in the ar table, as a simple cache # - payments are recorded in the acc_trans table, as regular transactions # - the "trans_id" column of the "acc_trans" table refers to the "ar" table instead of the "gl" table # - the "invoice" table holds the individual invoice "entries" # # Also consider this comment from Tony Fraser: # # Your looking for the "invoice" table. To limit your trigger to only POS # invoices you'll have to look in the "ar" table for a non NULL "till" # column. # # The relationship between the tables is "ar.id = invoice.trans_id". # # Message-Id: <1111535319.1884.63.camel@sybaws1.office.sybaspace.com> # # # gnucash column in 'ar' table # id id integer DEFAULT nextval('id'::text) # ~ invnumber text - making this the guid for future reference # posted transdate date DEFAULT ('now'::text)::date # = customer_id integer # those are caches for the individual entries recorded in the invoice table # ?? taxincluded boolean # ?? amount double precision # ?? netamount double precision # ?? paid double precision # ?? datepaid date # ?? duedate date # true? invoice boolean DEFAULT false - set when some entries are present in the invoice table # - shippingpoint text # - terms smallint # = notes text # currency curr character(3) # ? ordnumber text # - employee_id integer # ? till character varying(20) - recent post on that to the list, related to POS # ? quonumber text # - intnotes text # - department_id integer # - shipvia text # - language_code character varying(6) # opened used for posted if posted n/a # posttxn - the matching transaction(s?) # postlot - ? # postacc - ar or ap... # # things are a bit weird in gnucash, at least to me. There's postlot and # posttxn which seem to refer mostly to the same thing. I suspect that postlot # is simply a collector that takes care of tracking which "splits" belong to # the invoice, in case it gets "unposted". # # For posttxn, there we also have the gnc:transaction entries, which are linked # through posttxn, but also from the transaction itself, through a pointer back # to the invoice # # Transactions: # - backlinked from the GncInvoice, through the postlot # - those are the payments: they might already been imported above, in which # case we need to make sure they are properly mapped to the ap/ar tables # instead of gl. # sub parse_invoices { my $ref = shift(@_); my $tierid = shift(@_); my %lotid; my $table; my %invid = (); if (ref($ref->{"gnc:book"}->{"gnc:GncInvoice"}) ne "ARRAY") { $ref->{"gnc:book"}->{"gnc:GncInvoice"} = [ $ref->{"gnc:book"}->{"gnc:GncInvoice"} ]; } warn("invoice import not complete and completely supported, see source for details\n"); my %invoiceid = (); foreach $invoice (@{ $ref->{"gnc:book"}->{"gnc:GncInvoice"} }) { # assertion if (!$invoice->{"invoice:currency"}->{"cmdty:space"} eq "ISO4217") { warn("unknown currency space: ". $invoice->{"invoice:currency"}->{"cmdty:space"} . "\n"); } # shortcut $ref = $invoice->{"invoice:owner"}{"owner:id"}{"content"}; $type = $invoice->{"invoice:owner"}{"owner:type"}; if ($type eq "gncVendor") { $table = 'ap'; # Account Payables $tiercol = 'vendor_id'; } elsif ($type eq "gncCustomer") { $table = 'ar'; # Account Receivables $tiercol = 'customer_id'; } else { warn("unhandled invoice type $type in invoice $ref, skipping\n"); next; } # find date only if we're going to do something with this if ($invoice->{"invoice:active"} && $invoice->{"invoice:posted"}{"ts:date"}) { $dateposted = finddateposted($invoice->{"invoice:posted"}{"ts:date"}, $invoice->{"invoice:entered"}{"ts:date"}); } # not the same case here than in the customer nodes # here is gncCustomer, the other is GncCustomer $type = 'gnc:'.ucfirst($type); $notes = $invoice->{"invoice:notes"} || ""; if (ref($notes) eq "HASH") { $notes = $invoice->{"invoice:notes"}{"content"} || ""; } $notes =~ s/'/''/g; #' if ($invoice->{'invoice:postlot'}{'content'}) { $lotid{$invoice->{'invoice:postlot'}{'content'}} = $invoice->{"invoice:id"}; } if (!$invoice->{"invoice:active"} || !$invoice->{"invoice:posted"}{"ts:date"}) { warn("not importing inactive invoice $ref\n"); # if we ever import those, we need to comment out the if() around the finddateposted above #print qq|INSERT INTO oe (id, $tiercol, transdate, notes, curr, amount, netamount) VALUES ($invoice->{"invoice:id"}, $tierid->{$type}{$ref}, '$dateposted', '$notes', '$invoice->{"invoice:currency"}->{"cmdty:id"}', 0, 0);\n|; } else { # XXX: should be replaced by subqueries where appropriate $invid{$invoice->{"invoice:guid"}{'content'}} = [ $invoice->{"invoice:id"}, $table ]; print qq|INSERT INTO $table (id, invnumber, $tiercol, invoice, transdate, notes, curr, amount, netamount, paid) VALUES ($invoice->{"invoice:id"}, '$invoice->{"invoice:id"}', $tierid->{$type}{$ref}, TRUE, '$dateposted', '$notes', '$invoice->{"invoice:currency"}->{"cmdty:id"}', 0, 0, 0);\n|; } } return (\%lotid, \%invid); } # # INVOICE ENTRIES PARSING # # finally, to confuse it all, we have the GncEntry which looks exactly the same # as transactions. # # GncEntry: # - not backlinked direcly from anywhere # - b-acct pointing to an EXPENSE account # - entry:bill backlinked in a gnc:lot, which seems to list invoices or "stuff" # linked with an account. This is the same lot as the invoice's postlot. # - entry:bill backlink in a transaction's trn:slot, not sure if relevant # - entry:bill is obviously a link to the invoice # - holds the following info: # * date, date entered # * description # * action (unit?) # * qty # * i-acct # * price # * invoice # * i-disc-type, i-disc-how (discounts??) # * i-taxable # * i-taxincluded # # there doesn't seem to be a matching gncentry for each transaction, and vice-versa # # there are many gncentry per invoice. those are the invoice entries # # gncentry will probably map into entries in the invoice table # in invoice, there is a negative 'qty' for pauables. similar for acc_trans # entries: the sum is negative or positive depending on wheteer this is a ar or # ap reference. # sub parse_entries { my $ref = shift(@_); my $invid = shift(@_); my $taxes = shift(@_); my $options = shift(@_); my $acccid = shift(@_); print DEBUG Dumper($taxes) if $opts{'d'}; $taxtable = $options->{"Default Vendor TaxTable"} || undef; if (!defined($taxtable)) { $taxtable = $options->{"Default Customer TaxTable"}; } $parts_id = 0; foreach $entry (@{ $ref->{"gnc:book"}->{"gnc:GncEntry"} }) { print DEBUG Dumper($entry) if $opts{'d'}; if (defined($entry->{'entry:i-price'})) { $price = compute_amount($entry->{'entry:i-price'}); $vendor_fact = 1; } elsif (defined($entry->{'entry:b-price'})) { $price = compute_amount($entry->{'entry:b-price'}); $vendor_fact = -1; } else { warn ("no price found in entry ". $entry->{"entry:guid"}{"content"} .", skipping\n"); next; } $qty = compute_amount($entry->{'entry:qty'}) * $vendor_fact; if (defined($entry->{'entry:bill'}{'content'})) { $invoice = $entry->{'entry:bill'}{'content'}; } else { $invoice = $entry->{'entry:invoice'}{'content'}; } $description = $entry->{'entry:description'}; $description =~ s/'/''/g; #' # discounts $discount = 0; $sellprice = $fxsellprice = $price; if (defined($entry->{'entry:i-discount'})) { $discount = compute_amount($entry->{'entry:i-discount'}); if ($entry->{'entry:i-disc-type'} eq 'PERCENT') { $discount *= 0.01; # stored in X% in gnucash (!!) $sellprice -= $price * $discount; $fxsellprice -= $price * $discount; } elsif ($entry->{'entry:i-disc-type'} eq 'VALUE') { $sellprice -= $discount; $fxsellprice -= $discount; warn("VALUE discount: $discount, price: $price, possible precision loss\n"); $discount /= $price; } } # the ugly taxtable finding algorithm # setup defaults $taxtable = undef; $taxinc = 0; $taxedprice = $sellprice; # we default to tax included/no tax if ($entry->{'entry:i-taxable'} || $entry->{'entry:b-taxable'}) { if ($entry->{'entry:i-taxincluded'}) { $taxinc = $entry->{'entry:i-taxincluded'}; } elsif ($entry->{'entry:b-taxincluded'}) { $taxinc = $entry->{'entry:b-taxincluded'}; } if (defined($entry->{'entry:i-taxtable'}{'content'})) { $taxtable = $entry->{'entry:i-taxtable'}{'content'}; } elsif (defined($entry->{'entry:b-taxtable'}{'content'})) { $taxtable = $entry->{'entry:b-taxtable'}{'content'}; } if (defined($taxtable)) { $rate = $taxes->{$taxtable}; } else { $rate = 1; } if ($taxinc) { $taxedprice = $sellprice; $sellprice /= $rate; # deduce the original price } else { $taxedprice = $sellprice * $rate; } } $parts_id++; $unit = substr($entry->{'entry:action'} || "", 0, 5); $unit =~ s/'/''/g; #' if (defined($entry->{'entry:i-acct'}{'content'})) { $inc_account = $accid->{$entry->{'entry:i-acct'}{'content'}}; } else { $inc_account = "NULL"; } if (defined($entry->{'entry:b-acct'}{'content'})) { $exp_account = $accid->{$entry->{'entry:b-acct'}{'content'}}; } else { $exp_account = "NULL"; } $trans_id = sprintf("%06d", $invid->{$invoice}[0]); print qq|INSERT INTO parts (id, partnumber, description, income_accno_id, expense_accno_id, unit, listprice, sellprice, lastcost, weight, onhand, notes) VALUES ($parts_id, $parts_id, '$description', $inc_account, $exp_account, '', 0, 0, 0, 0, 0, '');|; if ($sellprice != $taxedprice) { print qq|INSERT INTO partstax SELECT $parts_id,id FROM chart WHERE link LIKE '%CT_tax%';\n|; } print qq|INSERT INTO invoice (trans_id, parts_id, sellprice, fxsellprice, qty, description, allocated, discount, unit) VALUES ($trans_id, $parts_id, $sellprice, $fxsellprice, $qty, '$description', 0, $discount, '$unit');\n|; if ($qty < 0) { $table = "ap"; $mul = -1; # necessary because the vendor bills were reversed } else { $table = "ar"; $mul = 1; } $total_price = $qty * $sellprice * $mul; $total_tax = $qty * $taxedprice * $mul; print qq|UPDATE $table SET amount = amount + $total_tax, netamount = netamount + $total_price WHERE id = $trans_id;\n|; } } # # TAX TABLES CONVERSIONS # # returns taxtable guid => rate # # where the rate is the product of all the table's tax entries sub parse_taxes { my $ref = shift(@_); my $accid = shift(@_); my %taxes = (); foreach $entry (@{ $ref->{"gnc:book"}->{"gnc:GncTaxTable"} }) { print DEBUG Dumper($entry) if $opts{'d'}; $entries = $entry->{'taxtable:entries'}->{'gnc:GncTaxTableEntry'}; if (ref($entries) ne "ARRAY") { $entries = [ $entries ]; } $taxnumber = $entry->{'taxtable:guid'}{'content'}; $taxes{$taxnumber} = 1; foreach $tx (@{ $entries }) { $rate = compute_amount($tx->{'tte:amount'}) * 0.01; if (defined($tx->{'tte:acct'}) && defined($accid->{$tx->{'tte:acct'}{'content'}})) { $chart_id = $accid->{$tx->{'tte:acct'}{'content'}}; } else { warn("inconsistency detected: invalid or no account for taxtable ".$entry->{"taxtable:guid"}{"content"}.", skipping\n"); next; } $taxes{$taxnumber} += $rate; if (!$entry->{'taxtable:invisible'}) { print qq|INSERT INTO tax (chart_id, rate, taxnumber) VALUES ((SELECT id FROM chart WHERE accno = '$chart_id'), $rate, NULL);\n|; } } } return \%taxes; } # # OPTIONS CONVERSION # # returns a name => value hash of the options sub parse_options { my $ref = shift(@_); my %options = (); foreach $slot (@{$ref->{"gnc:book"}{"book:slots"}{"slot"}}) { if ($slot->{"slot:key"} eq "options") { if (ref($slot->{"slot:value"}{"slot"}) ne "ARRAY") { $slot->{"slot:value"}{"slot"} = [ $slot->{"slot:value"}{"slot"} ]; } foreach $opts (@{ $slot->{"slot:value"}{"slot"} }) { if ($opts->{"slot:key"} eq "Business") { foreach $vals (@{ $opts->{"slot:value"}{"slot"} }) { $options{$vals->{"slot:key"}} = $vals->{"slot:value"}{"content"}; } } } } } return \%options; } # # OTHER STUFF # # we could examine the ->{"version"} to see if it's 2.0.0 # # there is an interesting field: gnc:count-data # # see also: http://gnucash.org/images/diagrams/structures.png # # This might be just a XSLT: # # http://www.gnucash.org/docs/v1.8/C/gnucash-guide/appendixa_xmlconvert1.html $opts{'d'} && close(DEBUG); # vi: noexpandtab sw=4