"""
$URL: svn+ssh://svn.mikewatkins.ca/repo/trunk/parlez/lib/pgsql_storage.py $
$Id: pgsql_storage.py 43 2007-09-12 23:25:01Z mw $

PostgresqlStorage isn't a speed demon compared to native file system storages,
but it holds up very well compared to sqlite when the record count is large
(millions of records). Its industrial strength design and proven track record
with many concurrent connections might make it a more comfortable fit for some
applications.

This variant of PostgresqlStorage is for the pgsql DBAPI driver only. pgsql
provides access to Postgresql's very efficient COPY functionality (used in the
bulk_load_records utility function). Get it:

    http://pypi.python.org/pypi/python-pgsql/
    http://people.rpath.com/~gafton/pgsql

Usage:
    import pgsql
    sql_connection = pgsql.connect(database='test')
    connection = Connection(PostgresqlStorage(sql_connection))
    root = connection.get_root()
    ...
"""
from durus.serialize import unpack_record
from durus.utils import int8_to_str, str_to_int8
try:
    from parlez.dbapi_storage import DBAPIStorage # my as-yet unreleased library
except ImportError:
    from dbapi_storage import DBAPIStorage

class PostgresqlStorage(DBAPIStorage):
    """
    Provides a Postgresql storage backend for Durus. Requires a DBAPI
    compatible Postgresql interface; after evaluating several DBAPI interfaces,
    at this time only psycopg2 and pgsql is supported; pgsql is preferred for
    what it offers that psycopg2 does not: pgsql implements true server-side
    cursors, executemany properly, prepared statements and provides a
    convenient method connection.bulkload() for accessing Postgresql's
    extremely fast COPY functionality for importing data. For every day
    operations pgsql is faster provided you make use of prepared statements.

    At 2007-08-24 this storage should be considered experimental and unproven.

    This PostgresqlStorage variant is for pgsql *only*. See psycopg2_storage
    for a variant suitable for that dbapi driver.
    """
    def __init__(self, sql_connection, table_name='durus_records', **kwargs):
        DBAPIStorage.__init__(self, sql_connection, table_name=table_name, **kwargs)
        self._sql_prepared_queries = dict(
            new_oid = self._sql_connection.prepare(
                'INSERT INTO %s DEFAULT VALUES RETURNING p_oid' % self._sql_table_name),
            load = self._sql_connection.prepare(
                'SELECT record FROM %s WHERE p_oid = $1 and record is not NULL' %\
                self._sql_table_name),
            end = self._sql_connection.prepare(
                'UPDATE %s set record=$1, pack=0 WHERE p_oid = $2' % self._sql_table_name),
            )

    def _sql_ensure_tables(self):
        """
        Not portable at all.
        """
        def tables_exist():
            result = self._sql_queryone(
                """
                SELECT tablename from pg_tables
                WHERE schemaname='public'
                AND tablename=$1
                """, (self._sql_table_name,))
            return result and result[0] == self._sql_table_name or False
        def create_tables():
            self._sql_execute(
                """
                CREATE TABLE %s
                    (p_oid SERIAL UNIQUE PRIMARY KEY,
                     pack INTEGER DEFAULT 0,
                     record BYTEA)
                """ % (self._sql_table_name,))
            # SERIAL statement forces creation of a SEQUENCE <tablename>_p_oid_seq
            # Dropping table drops SERIAL created sequence; by default the serial
            # numbering system starts at 1; reset it to start at 0 for ROOT_OID:
            self._sql_execute(
                """
                ALTER SEQUENCE %s_p_oid_seq minvalue 0 restart with 0
                """ % (self._sql_table_name,))
            # pack on large tables will benefit from:
            self._sql_execute(
                """
                CREATE INDEX %s_pack ON %s (pack)
                """ % (self._sql_table_name, self._sql_table_name))
        if not tables_exist():
            create_tables()

    def new_oid(self):
        """() -> oid:str
        Return an unused oid.
        """
        cursor = self._sql_prepared_queries.get('new_oid')
        return int8_to_str(cursor.execute().fetchone()[0])

    def load(self, oid):
        """(oid:str) -> record:str

        Overriding load() to take advantage of pgsql prepared statement capability.
        """
        cursor = self._sql_prepared_queries.get('load')
        cursor.execute((str_to_int8(oid),))
        result = cursor.fetchone()
        if result:
            return result[0]
        else:
            raise KeyError, 'oid %s not in storage' % str_to_int8(oid)

    def end(self, handle_invalidations=None):
        """
        Concludes a Durus commit.
        """
        def gen_transaction():
            for oid, record in self.transaction.iteritems():
                yield (record, str_to_int8(oid))
        cursor = self._sql_prepared_queries.get('end')
        cursor.executemany(gen_transaction())
        self.transaction = None
        self._sql_connection.commit()

    def get_size(self):
        """() -> integer

        Uses Postgresql pg_class system table; answer not accurate in between
        inserts and deletions except when VACUUM or CLUSTER has been run.

        Used for since number is informational, not important. The real time
        alternative: select count() - does a full tablescan because of pg's MVCC system.
        """
        result = self._sql_queryone(
            "SELECT reltuples from pg_class where relname = '%s'" % self._sql_table_name)
        return int(result[0])


def bulk_load_records(records, target_storage):
    """(records:sequence(str), target_storage:PostgresqlStorage) -> None

    Bulk load records into a PostgresqlStorage, preserving existing object ids.
    Raises KeyError if ANY records other than root exist in the target storage.

    WARNING! ALL existing records, including root, in target will be deleted! WARNING!
    """
    from durus.serialize import unpack_record
    assert isinstance(target_storage, PostgresqlStorage)
    sqldb = target_storage._sql_connection
    cursor = sqldb.cursor()
    def check_target_storage():
        cursor.execute('SELECT count(p_oid) FROM %s WHERE p_oid != 0' %\
                       (target_storage._sql_table_name,))
        result = cursor.fetchone()
        if result and result[0] > 1:
            raise SystemError, (
                'No records, other than root, may exist in target storage. '
                'WARNING: target storage; root itself will also be replaced.')
        cursor.execute('DELETE FROM %s' % target_storage._sql_table_name)
        cursor.execute('ALTER SEQUENCE %s_p_oid_seq restart with 0' %\
                       (target_storage._sql_table_name,))
        sqldb.commit()
    def gen_oid_record():
        for record in records:
            oid, data, refdata = unpack_record(record)
            oid = str_to_int8(oid)
            yield (oid, record)
    check_target_storage()
    # pgsql exposes Postgresql's extremely efficient COPY functionality:
    sqldb.bulkload(target_storage._sql_table_name, gen_oid_record(), ('p_oid', 'record'))
    # as bulkload uses COPY; sequences are not updated so we've got to do that
    cursor.execute('SELECT max(p_oid) from %s' % (target_storage._sql_table_name,))
    max_oid = cursor.fetchone()[0]
    assert max_oid is not None
    cursor.execute(
        """
        ALTER SEQUENCE %s_p_oid_seq restart with %d
        """ % (target_storage._sql_table_name, max_oid+1))
    sqldb.commit()

