#!/usr/bin/perl

# create_input_sql.pl --> Create PostgreSQL sql statements from
# .csv file for products table, and put them in a file called
# runme.postgresql.

# by Manni Wood

use strict;

my $csv_file_name = $ARGV[0];
unless ($csv_file_name) {
    print STDERR "Please enter the name of a CSV file.\n";
    print STDERR "Aborting.\n";
    exit 1;
}
my $output_file_name = "runme.postgresql";

open (IN, $csv_file_name) || die "Could not read \"$csv_file_name\": $!";
open (OUT, ">$output_file_name") || die "Could not write \"$output_file_name\": $!";
# The first line of this file is the field names, which we do *not*
# want to insert into the database. So read and discard the first line.
$_ = <IN>;

# Instructions to psql to stop on the first error.
print OUT "\\set ON_ERROR_STOP 1\n";
# Do this all as one transaction.
print OUT "begin;\n";

while (<IN>) {
    # Blank lines can creep into .csv files. Skip them.
    next if (m/^\s*$/);
    chomp;
    my ($id, $name, $description, $price) = parse_csv($_);

    # Make all fields SQL-friendly
    $id = nullify_field($id);
    $name = quote_or_nullify_field($name);
    $description = quote_or_nullify_field($description);
    $price = nullify_field($price);

    print OUT "insert into products (id, name, description, price) values (" .
    $id . ", " .
    $name . ", " .
    $description . ", " .
    $price . ");\n";
}
print OUT "commit;\n";
close (OUT);
close (IN);

sub parse_csv {
    # The goal here is to just go ahead and split on commas,
    # and then find chunks that start with a " and assume that we've
    # broken apart a field containing commas; re-join the chunks to
    # the chunk beginning with " until we find a chunk ending with ".
    my @chunks;
    my $chunk;
    my @fields;
    my $field;
    my $line = shift;

    @chunks = split(/,/, $line);

    my $i;
    for ($i = 0; $i <= $#chunks; ++$i) {
        $chunk = $chunks[$i];
        $field = $chunk;
        # If chunk starts wtih a double-quote but does not end with one,
        if (substr($chunk, 0, 1) eq '"' && substr($chunk, -1, 1) ne '"') {
            # join the current chunk with the next chunk, replacing the
            # comma that got eliminated during the split on commas
            ++$i;
            $chunk = $chunks[$i];
            $field .= "," . $chunk;
            # and continue to do so until you find a chunk that ends with "
            # or you have run out of chunks.
            while (substr($chunk, -1, 1) ne '"' && $i <= $#chunks) {
                ++$i;
                $chunk = $chunks[$i];
                $field .= "," . $chunk;
            }
            # Our goal is to get rid of all field delimitors, so
            # get rid of the leading double-quote
            $field = substr($field, 1);
            # and the trailing double-quote.
            chop($field);
        }
        # If the chunk starts and ends with a double-quote,
        if (substr($chunk, 0, 1) eq '"' && substr($chunk, -1, 1) eq '"') {
            # get rid of the leading double-quote
            $field = substr($field, 1);
            # and the trailing double-quote.
            chop($field);
        }
        # In CSV files, double quotes are escaped by doubling them up,
        # so un-double them.
        $field =~ s/""/"/g;
        # Finally, we have a field that is completely usable, so add it to the
        # array of fields we will return at the end of this subroutine.
        push(@fields, $field);
        # Clear the field for re-use in the next iteration of this loop.
        $field = "";
    }
    return @fields;
}

sub nullify_field {
    # For SQL fields that do not need to be wrapped in single quotes.
    # If a field is empty, replace it with the string "null",
    # which can safely be used in sql insert statements.
    my $field = $_[0];
    if ($field) {
        return $field;
    } else {
        return "null";
    }
}

sub quote_or_nullify_field {
    # For SQL fields that need to be wrapped in single quotes.
    # If a field is empty, replace it with the string "null",
    # which can safely be used in sql insert statements.
    # If it is not empty, replace any single quote with double single
    # quotes (that's how you escape them in SQL) and wrap the whole thing
    # in single quotes (for use in a SQL insert statement).
    my $field = $_[0];
    if ($field) {
        # Remove backslashes: they cause havoc in SQL.
        $field =~ s/\\//g;
        # Repeat single quotes: this is how they are escaped in SQL.
        $field =~ s/'/''/g;
        $field = "'" . $field . "'";
    } else {
        $field = "null";
    }
}


