]> Frank Brehm's Git Trees - my-stuff/sql-parse.git/commitdiff
Modul Text::Parse::SQL verwendungsfähig gemacht
authorFrank Brehm <frank@brehm-online.com>
Thu, 7 May 2009 12:58:20 +0000 (12:58 +0000)
committerFrank Brehm <frank@brehm-online.com>
Thu, 7 May 2009 12:58:20 +0000 (12:58 +0000)
git-svn-id: http://svn.brehm-online.com/svn/my-stuff/sql-parse/trunk@32 ec8d2aa5-1599-4edb-8739-2b3a1bc399aa

lib/Text/Parse/SQL.pm [new file with mode: 0644]

diff --git a/lib/Text/Parse/SQL.pm b/lib/Text/Parse/SQL.pm
new file mode 100644 (file)
index 0000000..d925329
--- /dev/null
@@ -0,0 +1,252 @@
+package Text::Parse::SQL;
+
+# $Id$
+# $URL$
+
+
+=head1 NAME
+
+Text::Parse::SQL: a module for parsing SQL data (normally the contents of a SQL file)
+                  to give back a series of SQL-Statements.
+
+=head1 DESCRIPTION
+
+This module provides a simple way to parse SQL data or a SQL file.
+
+This parsing removes all line comments ('-- ...') and, if the property 'remove_block_comments'
+is set, all block comments ('/* ... */').
+
+All quotings will preserved.
+
+=head1 SYNOPSIS
+
+  use Text::Parse::SQL;
+
+  my $parser = new Text::Parse::SQL( 'terminator' => ';', 'remove_block_comments' => 1 );
+  my $statements = $parser->parse_file( 'create_db.sql' );
+
+=cut
+
+use 5.8.0;
+use strict;
+use warnings;
+
+use Carp qw( cluck croak confess);
+use Class::Std;
+use Readonly;
+use Params::Util qw( _SCALAR _STRING );
+
+my $Revis = <<'ENDE';
+    $Revision$
+ENDE
+$Revis =~ s/^.*:\s*(\S+)\s*\$.*/$1/s;
+our $VERSION = "0.1." . $Revis;
+
+=head1 PROPERTIES
+
+All properties could be set in the new constuctor, could be set later
+via $parser->set_PROPERTY($new_property) and could be queried
+via $parser->get_PROPERTY().
+
+=cut
+
+{
+
+=head2 terminator
+
+The terminator of a SQL statement. In most cases ';'.
+
+=cut
+
+       my %terminator_of : ATTR( :get<terminator> :set<terminator> );
+
+=head2 remove_block_comments
+
+Should block comments be removed? Sometime they are a valid
+part of a SQL statement (e.g. Oracle hints).
+
+Per default they are not removed.
+
+=cut
+
+       my %remove_block_comments_of : ATTR( :get<remove_block_comments> :set<remove_block_comments> );
+
+=head2 quote_chars
+
+All valid quoting characters.
+
+Per default: ' " and `
+
+=cut
+
+       my %quote_chars_of : ATTR( :get<quote_chars> :set<quote_chars> );
+
+       #----------------------------------------------------------------------------------------
+
+=head1 METHODS
+
+Alls methods.
+
+=head2 BUILD( )
+
+Will automatically called after creating the object with new();
+
+=cut
+
+       sub BUILD {
+
+               my ( $self, $ident, $args_ref ) = @_;
+
+               $terminator_of{$ident} = ';';
+               $remove_block_comments_of{$ident} = 0;
+               $quote_chars_of{$ident} = '\'"`';
+
+               return 1;
+
+       }
+
+       #----------------------------------------------------------------------------------------
+
+=head2 START( )
+
+Will also automatically called after creating the object with new(), but after BUILD().
+
+=cut
+
+       sub START {
+
+               my ( $self, $ident, $args_ref ) = @_;
+
+               $terminator_of{$ident} = $args_ref->{'terminator'} if $args_ref->{'terminator'};
+               $remove_block_comments_of{$ident} = $args_ref->{'remove_block_comments'} if exists $args_ref->{'remove_block_comments'};
+               $quote_chars_of{$ident} = $args_ref->{'quote_chars'} if $args_ref->{'quote_chars'};
+
+
+       }
+
+       #----------------------------------------------------------------------------------------
+
+=head2 DEMOLISH( )
+
+Will automatically called before destroying the object.
+
+=cut
+
+       sub DEMOLISH {
+
+               my ( $self, $ident ) = @_;
+
+
+       }
+
+}
+
+#----------------------------------------------------------------------------------------
+
+=head2 parse( $data )
+
+The main procedure of this module. It parses the given data and gives back a reference
+to an array of SQL statements. The terminators of the SQL statements are removed.
+
+On error it returns undef.
+
+=cut
+
+sub parse {
+
+       my ( $self, $data ) = @_;
+
+       return undef unless _STRING($data);
+
+       my $result = [];
+
+       my $cur_sql = '';
+       my $terminator = $self->get_terminator();
+       my $quote_chars = $self->get_quote_chars();
+       my $cur_quote_char = undef;
+
+       my @QChars = map { quotemeta($_) } split( //, $quote_chars );
+
+       $data .= "\n" unless $data =~ /\n$/;
+
+       while ( $data ) {
+
+               #print $data;
+
+               #sleep 1;
+
+               # first remove comments of type '-- ...'
+               if ( $data =~ /^\s*--(?:\s[^\n]*)?\n/ ) {
+                       #print "Remove comments ...\n";
+                       $data =~ s/^\s*--(?:\s[^\n]*)?\n//;
+                       next;
+               }
+
+               # remove heading whitespaces before the first statement
+               if ( $data =~ /^\s+/ and not $cur_sql ) {
+                       #print "Remove heading whitespaces ...\n";
+                       $data =~ s/^\s+//;
+                       next;
+               }
+
+               # Remove block comments, if necessary
+               if ( $self->get_remove_block_comments() and $data =~ m|/\*.*?\*/|s ) {
+                       $data =~ s|/\*.*?\*/||s;
+                       next;
+               }
+
+               # move all characters except terminators and quoting characters
+               # to the current sql statement
+               if ( $data =~ /^[^\Q$terminator$quote_chars\E]+/ ) {
+                       #print "Move characters except terminators and quoting characters ...\n";
+                       $data =~ s/^([^\Q$terminator$quote_chars\E]+)//;
+                       $cur_sql .= $1;
+                       next;
+               }
+
+               # remove terminator, start a new statement, save the old statement
+               if ( $data =~ /^\s*[$terminator]\s*/ ) {
+                       #print "Remove terminator ...\n";
+                       $data =~ s/^\s*[$terminator]\s*//;
+                       if ( $cur_sql ) {
+                               $cur_sql =~ s/^\s+//;
+                               $cur_sql =~ s/\s+$//;
+                               push @$result, $cur_sql if $cur_sql;
+                       }
+                       $cur_sql = '';
+                       next;
+               }
+
+               # move quotet text to the current sql statement
+               for my $c (@QChars) {
+                       if ( $data =~ /^$c[^$c]*$c/ ) {
+                               #print "Move quotet text ...\n";
+                               $data =~ s/^($c[^$c]*$c)//;
+                               $cur_sql .= $1;
+                               next;
+                       }
+               }
+
+       }
+
+       
+       if ( $cur_sql ) {
+               $cur_sql =~ s/^\s+//;
+               $cur_sql =~ s/\s+$//;
+               push @$result, $cur_sql if $cur_sql;
+       }
+
+       return $result;
+
+}
+
+
+#----------------------------------------------------------------------------------------
+
+1;
+
+#----------------------------------------------------------------------------------------
+
+__END__
+
+# vim: noai : ts=4 fenc=utf-8 filetype=perl :