#!/usr/bin/perl # create_input_sql.pl --> Create PostgreSQL sql statements from # .csv file for products table, and put them in a file called # runme.postgresql. # by Manni Wood use strict; my $csv_file_name = $ARGV[0]; unless ($csv_file_name) { print STDERR "Please enter the name of a CSV file.\n"; print STDERR "Aborting.\n"; exit 1; } my $output_file_name = "runme.postgresql"; open (IN, $csv_file_name) || die "Could not read \"$csv_file_name\": $!"; open (OUT, ">$output_file_name") || die "Could not write \"$output_file_name\": $!"; # The first line of this file is the field names, which we do *not* # want to insert into the database. So read and discard the first line. $_ = ; # Instructions to psql to stop on the first error. print OUT "\\set ON_ERROR_STOP 1\n"; # Do this all as one transaction. print OUT "begin;\n"; while () { # Blank lines can creep into .csv files. Skip them. next if (m/^\s*$/); chomp; my ($id, $name, $description, $price) = parse_csv($_); # Make all fields SQL-friendly $id = nullify_field($id); $name = quote_or_nullify_field($name); $description = quote_or_nullify_field($description); $price = nullify_field($price); print OUT "insert into products (id, name, description, price) values (" . $id . ", " . $name . ", " . $description . ", " . $price . ");\n"; } print OUT "commit;\n"; close (OUT); close (IN); sub parse_csv { # The goal here is to just go ahead and split on commas, # and then find chunks that start with a " and assume that we've # broken apart a field containing commas; re-join the chunks to # the chunk beginning with " until we find a chunk ending with ". my @chunks; my $chunk; my @fields; my $field; my $line = shift; @chunks = split(/,/, $line); my $i; for ($i = 0; $i <= $#chunks; ++$i) { $chunk = $chunks[$i]; $field = $chunk; # If chunk starts wtih a double-quote but does not end with one, if (substr($chunk, 0, 1) eq '"' && substr($chunk, -1, 1) ne '"') { # join the current chunk with the next chunk, replacing the # comma that got eliminated during the split on commas ++$i; $chunk = $chunks[$i]; $field .= "," . $chunk; # and continue to do so until you find a chunk that ends with " # or you have run out of chunks. while (substr($chunk, -1, 1) ne '"' && $i <= $#chunks) { ++$i; $chunk = $chunks[$i]; $field .= "," . $chunk; } # Our goal is to get rid of all field delimitors, so # get rid of the leading double-quote $field = substr($field, 1); # and the trailing double-quote. chop($field); } # If the chunk starts and ends with a double-quote, if (substr($chunk, 0, 1) eq '"' && substr($chunk, -1, 1) eq '"') { # get rid of the leading double-quote $field = substr($field, 1); # and the trailing double-quote. chop($field); } # In CSV files, double quotes are escaped by doubling them up, # so un-double them. $field =~ s/""/"/g; # Finally, we have a field that is completely usable, so add it to the # array of fields we will return at the end of this subroutine. push(@fields, $field); # Clear the field for re-use in the next iteration of this loop. $field = ""; } return @fields; } sub nullify_field { # For SQL fields that do not need to be wrapped in single quotes. # If a field is empty, replace it with the string "null", # which can safely be used in sql insert statements. my $field = $_[0]; if ($field) { return $field; } else { return "null"; } } sub quote_or_nullify_field { # For SQL fields that need to be wrapped in single quotes. # If a field is empty, replace it with the string "null", # which can safely be used in sql insert statements. # If it is not empty, replace any single quote with double single # quotes (that's how you escape them in SQL) and wrap the whole thing # in single quotes (for use in a SQL insert statement). my $field = $_[0]; if ($field) { # Remove backslashes: they cause havoc in SQL. $field =~ s/\\//g; # Repeat single quotes: this is how they are escaped in SQL. $field =~ s/'/''/g; $field = "'" . $field . "'"; } else { $field = "null"; } }