###############################################################################
## ##
## ALEXANDRIA DIGITAL LIBRARY ##
## University of California at Santa Barbara ##
## ##
## ------------------------------------------------------------------------- ##
## ##
## Copyright (c) 2004 by the Regents of the University of California ##
## All rights reserved ##
## ##
## Redistribution and use in source and binary forms, with or without ##
## modification, are permitted provided that the following conditions are ##
## met: ##
## ##
## 1. Redistributions of source code must retain the above copyright ##
## notice, this list of conditions, and the following disclaimer. ##
## ##
## 2. Redistributions in binary form must reproduce the above copyright ##
## notice, this list of conditions, and the following disclaimer in ##
## the documentation and/or other materials provided with the ##
## distribution. ##
## ##
## 3. All advertising materials mentioning features or use of this ##
## software must display the following acknowledgement: This product ##
## includes software developed by the Alexandria Digital Library, ##
## University of California at Santa Barbara, and its contributors. ##
## ##
## 4. Neither the name of the University nor the names of its ##
## contributors may be used to endorse or promote products derived ##
## from this software without specific prior written permission. ##
## ##
## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY ##
## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ##
## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE ##
## DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ##
## ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ##
## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ##
## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ##
## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ##
## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ##
## ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ##
## POSSIBILITY OF SUCH DAMAGE. ##
## ##
###############################################################################
# $Header: /export/home/gjanee/bucket99/RCS/UniversalTranslator.py,v 1.11 2004/02/24 22:42:21 gjanee Exp $
# SYNOPSIS
#
# class Translator (buckets, externalIdTable=None, optimizerHint=None,
# rewriteSetOpsAsSubqueries=0)
#
# An object that holds the information needed to translate
# constraints. It is initialized with:
#
# buckets
# A dictionary that maps the names of supported buckets
# (e.g., "geographic-locations") to Bucket objects.
#
# externalIdTable
# Optionally, an ExternalIdTable object that describes a
# many-to-one table that maps internal identifiers (the
# identifiers used to join tables internally) to external
# identifiers (the identifiers that are ultimately
# returned).
#
# optimizerHint
# Optionally, an optimizer hint, e.g.,
# "/*+ FIRST_ROWS(1000) */". The hint is placed
# immediately after the SELECT keyword in each outermost
# SELECT clause.
#
# rewriteSetOpsAsSubqueries
# A boolean that indicates if INTERSECT and EXCEPT set
# operations should be rewritten using subqueries. The
# default is false. If true, the returned query will be
# either a single SELECT statement or a simple UNION of
# two or more SELECT statements (and therefore in
# disjunctive normal form).
#
# def Translator.translate (constraint, vocabularies)
#
# Translates a constraint. Returns either an SQL query as a
# string or an edu.ucsb.adl.middleware.MiddlewareException
# object.
#
# constraint
# A constraint (specifically, an
# edu.ucsb.adl.middleware.Query.Constraint object) to be
# translated to SQL.
#
# vocabularies
# The vocabularies (specifically, an array of
# edu.ucsb.adl.bucket99.Vocabulary objects) to be used in
# the translation.
#
# DESCRIPTION
#
# Universal query translator. This translator is intended to be
# referenced from the query translation script proper in the
# stylized manner shown below:
#
# import UniversalTranslator
# UT = UniversalTranslator
#
# import paradigms
# P = paradigms
#
# buckets = { ... }
#
# translator = UT.Translator(buckets)
#
# def translate ():
# return translator.translate(constraint, vocabularies)
#
# 'buckets' is a dictionary that maps the names of supported
# buckets to Bucket objects. A Bucket object describes key
# characteristics and the translation mechanism of a bucket. It
# is initialized with three arguments: a bucket type (which must
# be "spatial", "temporal", "hierarchical", "textual",
# "identification", "numeric", or "relational"); a list of the
# operators the bucket supports; and a Paradigm object that
# implements query translation on behalf of the bucket. For
# example:
#
# buckets = {
# "resolution" :
# UT.Bucket(
# "numeric",
# UT.standardNumericOperators,
# P.Numeric_Straight("mytable", "id",
# "res_meters", {}, UT.Cardinality("1")))
# }
#
# The above example describes a numeric bucket named "resolution"
# that supports the standard numeric operators (see below). The
# bucket is implemented by the Numeric_Straight paradigm, which
# translates numeric constraints to straight SQL numeric
# column/value comparisons. The arguments to the paradigm are
# specific to the paradigm, and are described in the paradigm's
# documentation. The ultimate effect of the particular
# declaration above is that a constraint
#
#
# resolution
# less-than
# 3.5
#
#
# is translated to
#
# SELECT A.id FROM mytable A WHERE A.res_meters < 3.5
#
# There are many paradigms; they are located in the 'paradigms'
# directory.
#
# For convenience, the universal translator defines the following
# lists of standard operators:
#
# UT.standardSpatialOperators =
# ["contains", "is-contained-in", "overlaps"]
#
# UT.standardTemporalOperators =
# ["contains", "is-contained-in", "overlaps"]
#
# UT.standardHierarchicalOperators =
# ["is-a"]
#
# UT.standardTextualOperators =
# ["contains-all-words", "contains-any-words",
# "contains-phrase"]
#
# UT.extendedTextualOperators =
# UT.standardTextualOperators + ["equals", "matches-pattern"]
#
# UT.standardIdentificationOperators =
# ["matches"]
#
# UT.standardNumericOperators =
# ["equals", "not-equals", "less-than",
# "less-than-or-equal-to", "greater-than",
# "greater-than-or-equal-to"]
#
# (There are no standard relational operators; instead, the
# relations themselves are treated as operators.)
#
# As noted above, the Translator object can optionally be
# initialized with an ExternalIdTable object that describes a
# table that maps internal identifiers to external identifiers.
# The object has the structure
#
# ExternalIdTable (table, internalIdColumn, externalIdColumn)
#
# For example, a configuration
#
# translator = UT.Translator(buckets,
# UT.ExternalIdTable("id_map", "int_id", "ext_id"))
#
# will cause the translator to convert the nominal query
#
# SELECT A.id FROM mytable A WHERE A.res_meters < 3.5
#
# to
#
# SELECT A.ext_id FROM id_map A, mytable B WHERE
# A.int_id = B.id AND B.res_meters < 3.5
#
# FIELD ADAPTORS
#
# A field adaptor is a special kind of paradigm that adds support
# for field-level searching to a paradigm (or set of paradigms)
# that doesn't otherwise support it. Adaptors are located in the
# 'paradigms' directory.
#
# PARADIGMS
#
# The remainder of this documentation describes the universal
# translator's paradigm interface.
#
# A paradigm is a subclass of the Paradigm abstract class; it
# should implement a particular technique of constraint
# translation. By convention, each paradigm is stored in a
# separate file by the same name in the 'paradigms' package
# directory, that is, as an eponymous module. Thus paradigm class
# 'myparadigm' is defined in file 'paradigms/myparadigm.py'. In
# addition, by convention, a pair of statements similar to the
# following is added to 'paradigms/__init__.py' for each
# paradigm:
#
# import myparadigm
# myparadigm = myparadigm.myparadigm
#
# This makes it possible to reference a paradigm from the query
# translation script proper as simply
#
# P.myparadigm
#
# A paradigm *must* implement the following abstract method:
#
# def translateBucketAtomic (constraint, vocabularies)
#
# Should translate constraint 'constraint' to SQL and
# return a Select or Query object (see below).
# 'vocabularies' is as described above, and should not be
# modified by the paradigm. The paradigm can assume that
# 'constraint' is a simple constraint, i.e., an
# edu.ucsb.adl.middleware.Query.SimpleConstraint object,
# and that the constraint does not include a field
# constraint. The paradigm may treat constraint/paradigm
# type incompatibilities and unsupported operator errors
# as assertion violations. All other translation errors
# should be indicated by raising a QueryError exception.
#
# A paradigm *may* implement the following method. The default
# implementation returns None.
#
# def translateBucketBoolean (operator, constraints, vocabularies)
#
# Should translate a boolean combination of constraints
# against the same bucket to SQL and return a Select or
# Query object. 'operator' is the boolean operator, and
# is either "AND", "OR", or "AND NOT". 'constraints' is a
# list of constraints. 'vocabularies' is as described
# above, and should not be modified by the paradigm. The
# paradigm can assume that the operator and constraints
# are consistent: if the operator is "AND" or "OR" there
# are at least two constraints, and if the operator is
# "AND NOT" there are exactly two constraints. The
# paradigm can assume that all constraints have the same
# type (in particular, are instances of the same subclass
# of edu.ucsb.adl.middleware.Query.SimpleConstraint), and
# that no constraint includes a field constraint. The
# paradigm may treat constraint/paradigm type
# incompatibilities and unsupported operator errors as
# assertion violations. All other translation errors
# should be indicated by raising a QueryError exception.
# If the query is valid but the paradigm cannot perform
# the translation, it may return None (in which case the
# universal translator invokes the paradigm's
# 'translateBucketAtomic' method individually on each
# constraint and uses SQL operators to combine the
# resulting queries).
#
# A paradigm *may* implement the following method. The default
# implementation raises a QueryError exception containing a
# message, "bucket does not support field constraints."
#
# def translateFieldAtomic (constraint, vocabularies)
#
# Identical to 'translateBucketAtomic' above, except that
# the constraint contains a field constraint.
#
# A paradigm *may* implement the following method. The default
# implementation returns None.
#
# def translateFieldBoolean (operator, constraints, vocabularies)
#
# Identical to 'translateBucketBoolean' above, except that
# every constraint contains the same field constraint.
#
# Paradigms return SQL in the form of Query and Select objects;
# these objects and their sub-objects effectively define a
# restricted relational model. In this model, there are two types
# of tables: main tables and auxiliary tables. A main table is a
# table whose rows represent collection items and whose columns
# store metadata associated with those items; one of the columns
# must be an identifier column, that is, it must store the
# identifiers ultimately returned by a query (modulo one-time
# remapping using an ExternalIdTable object as described above).
# It is assumed that all identifier columns in all main tables are
# equi-joinable. A main table may contain multiple rows
# (including zero rows) per collection item.
#
# An auxiliary table is any table used in conjunction with a main
# table in implementing a paradigm. The relationship that ties
# an auxiliary table to its main table is called the former's
# "join condition."
#
# For example, consider a relational schema
#
# CREATE TABLE item (
# item_id INTEGER NOT NULL,
# type_id INTEGER NOT NULL,
# ...,
# PRIMARY KEY (item_id),
# FOREIGN KEY (type_id) REFERENCES type
# );
#
# CREATE TABLE type (
# type_id INTEGER NOT NULL,
# name VARCHAR(80) NOT NULL,
# PRIMARY KEY (type_id)
# );
#
# such that a constraint
#
#
# types
# is-a
# glorp
#
#
# is translated to
#
# SELECT A.item_id FROM item A, type B
# WHERE A.type_id = B.type_id AND B.name = 'glorp'
#
# In this example we would say that 'item' is a main table with
# identifier column 'item_id'. 'type' is an auxiliary table with
# join condition "item.type_id = type.type_id".
#
# We describe next the Query and Select objects in detail. Query
# objects are built from Select objects, which in turn are built
# from Expression, From, TableRef, and Cardinality objects.
# Select objects stand on their own; the Select sub-objects
# (Expression, etc.) have meaning only within the context of the
# Select object that contains them.
#
# class Query (operator, list)
#
# A set-theoretic combination of two or more Select or
# Query objects.
#
# operator
# A set operator: "UNION", "INTERSECT", or "EXCEPT".
#
# list
# A list of two or more ("UNION" or "INTERSECT") or
# exactly two ("EXCEPT") Select or Query objects.
#
# class Select (froms, expression)
#
# A single, self-contained SELECT statement. The
# statement may contain subqueries (represented as sub-
# Select objects referenced by 'expression'), but there is
# no provision for correlated subqueries.
#
# froms
# A list of one or more From objects describing the
# tables that are being INNER JOINed and SELECTed
# from. At least one From object must be a MainFrom
# object, i.e., a main table.
#
# expression
# An Expression object that describes the statement's
# WHERE clause.
#
# The semantics of a Select object are:
#
# SELECT identifier-column FROM froms WHERE
# main-from-equi-join-conditions AND
# auxiliary-from-join-conditions AND expression
#
# class From (table)
#
# A table that is SELECTed from. This is an abstract
# class; concrete subclasses are listed next.
#
# table
# A TableRef object referencing the table.
#
# class MainFrom (table, idColumn, cardinality)
#
# A main table that is SELECTed from.
#
# idColumn
# The name of the table's identifier column.
#
# cardinality
# A Cardinality object representing the table's
# cardinality with respect to the subset of its
# columns referenced in the SELECT statement (see
# below).
#
# class AuxiliaryFrom (table, joinCondition)
#
# An auxiliary table that is SELECTed from.
#
# joinCondition
# An Expression object that describes the INNER JOIN
# condition relating the auxiliary table to its main
# table.
#
# class Cardinality (value, nullable=0)
#
# The cardinality of a main table with respect to the
# subset of its columns referenced in the SELECT
# statement, expressed as a range of the possible number
# of rows. We say that the table has multiple rows per
# identifier if and only if at least one of the table's
# columns referenced in the SELECT statement is *not*
# functionally dependent on the identifier column.
# Otherwise (if every referenced column *is* functionally
# dependent on the identifier column) we say that the
# table has one row per identifier. The cardinality value
# also incorporates an indication of whether the table has
# at least one row for every collection item, or if zero
# rows are possible.
#
# value
# Either "1" (exactly one row), "1?" (zero or one
# rows), "1+" (one or more rows), or "0+" (zero or
# more rows).
#
# nullable
# A boolean indicating if any of the table's columns
# referenced in the SELECT statement may be NULL, that
# is, if the SELECT expression may possibly return the
# UNKNOWN value. The default is false.
#
# A technical point: the cardinality of a main table
# really encompasses the cardinalities of its auxiliary
# tables, if any.
#
# class TableRef (name)
#
# A reference to a table. Instances of this class are
# significant; two TableRef objects that name the same
# table are considered to be two distinct references to
# the same table (see below).
#
# name
# A table name.
#
# class Expression (list)
#
# A complete SQL expression. This class handles
# appropriate (and minimal) generation of parentheses; for
# correct processing, the 'combine' and 'negate' methods
# must be used to create boolean combinations of
# expression primitives.
#
# list
# A list of elements (strings, TableRef objects,
# Select objects, and Query objects) such that the
# concatenation of the string forms of the elements
# yields the SQL expression. A string is its own
# string form. A TableRef object's string form is the
# table's SQL alias. The string form of a Select or
# Query object is the recursive rendering of the
# object placed in parentheses. By convention, the
# constant TRUE (FALSE) is represented by the
# single-element list ["1 = 1"] (["1 = 0"]).
#
# def combine (operator, expression)
#
# Returns a new Expression object that represents a
# boolean combination of the expression with another
# Expression object.
#
# operator
# A boolean operator: "AND", "OR", or "AND NOT".
#
# expression
# The other Expression object.
#
# def negate ()
#
# Returns a new Expression object that represents the
# negation of the expression.
#
# As a special case, the expression may be empty (i.e.,
# 'list' may be the empty list). Such an expression
# cannot be a Select object's expression, but it can be
# used to combine with other Expression objects, thus
# making it easier to code loops in some cases.
#
# TableRef instances must be created and referenced with care.
# Generally speaking, all of a Select object's references to a
# table should refer to the same TableRef instance, unless a
# self-join is being expressed. For example, the following Select
# object is erroneous (and will trigger an assertion violation)
# because the Select object's expression does not reference a
# table listed in the object's 'froms' list:
#
# UT.Select(
# [UT.MainFrom(UT.TableRef("mytable"), ...)],
# UT.Expression([UT.TableRef("mytable"), ".column = ..."]))
#
# Instead, the following idiom must generally be used:
#
# table = UT.TableRef("mytable")
#
# UT.Select(
# [UT.MainFrom(table, ...)],
# UT.Expression([table, ".column = ..."]))
#
# Putting it all together, the example query above:
#
# SELECT A.item_id FROM item A, type B
# WHERE A.type_id = B.type_id AND B.name = 'glorp'
#
# would be represented as the following Select object (assuming
# there is a one-to-one relationship between rows in 'item' and
# collection items):
#
# item_table = UT.TableRef("item")
# type_table = UT.TableRef("type")
#
# UT.Select(
# [UT.MainFrom(item_table, "item_id", UT.Cardinality("1")),
# UT.AuxiliaryFrom(
# type_table,
# UT.Expression([item_table, ".type_id = ",
# type_table, ".type_id"]))],
# UT.Expression([type_table, ".name = 'glorp'"]))
#
# Paradigms that support field-level searching (specifically,
# field adaptors) should be derived from abstract class
# FieldSearchableParadigm.
#
# A final note: the programming model used in this translator and
# in the paradigms is "transparent immutable objects": objects are
# transparent in that their fields are accessed directly, and once
# objects are created they are never modified.
#
# AUTHOR
#
# Greg Janee
# gjanee@alexandria.ucsb.edu
#
# HISTORY
#
# $Log: UniversalTranslator.py,v $
# Revision 1.11 2004/02/24 22:42:21 gjanee
# Added support for relational constraints.
#
# Revision 1.10 2004/02/09 23:57:20 gjanee
# Added public constant 'extendedTextualOperators'.
#
# Revision 1.9 2004/02/03 18:17:31 gjanee
# Added the 'rewriteSetOpsAsSubqueries' option.
#
# Revision 1.8 2003/10/28 00:11:37 gjanee
# Added abstract class 'FieldSearchableParadigm'. The new policy
# on translation of field-level constraints is as follows. All
# paradigms that support field-level searching (that is, implement
# at least the 'translateFieldAtomic' method), and field adaptors
# specifically, should be derived from 'FieldSearchableParadigm'.
# A caller invoking a paradigm to translate one or more
# field-level constraints should use a field-level method (e.g.,
# 'translateFieldAtomic') if the paradigm supports field-level
# searching, and a bucket-level method (e.g.,
# 'translateBucketBoolean') otherwise. If a paradigm does not
# support field-level searching, its bucket-level methods will be
# called (per above) and it should treat all constraints as being
# bucket-level, i.e., it should simply ignore the field
# specifications in constraints.
#
# Revision 1.7 2003/01/28 18:19:31 gjanee
# Added paradigm convenience functions 'constantTrueQuery' and
# 'constantFalseQuery'.
#
# Revision 1.6 2003/01/23 21:57:20 gjanee
# Added a 'nullable' attribute to the Cardinality object to
# properly support NULLs (or more accurately, UNKNOWNs).
# Clarified and made consistent the programming model used in this
# translator and in the paradigms.
#
# Revision 1.5 2002/11/04 22:44:14 gjanee
# Adaptors rely on the convention that paradigms treat all
# constraints passed to 'translateBucketAtomic' and
# 'translateBucketBoolean' as being bucket-level, even if they are
# in fact field-level. Thus, the default implementations of these
# methods must behave likewise, and in particular they must not
# perform any assertion checks on constraint.getField().
#
# Revision 1.4 2002/10/30 04:11:52 gjanee
# Added 'protectLikeWildcards'.
#
# Revision 1.3 2002/10/15 16:55:50 gjanee
# Refactored the unification code out of function
# '_translateBooleanConstraint' and into a separate function,
# '_unify', to allow selected paradigms to access that
# functionality. Added 'set...' methods to class Cardinality.
# Added the paradigm convenience functions 'protectQuotes' and
# 'falsify'.
#
# Revision 1.2 2002/10/03 21:31:51 gjanee
# Added conventional representations for the constants TRUE and
# FALSE. To accommodate constant expressions, removed the Select
# object's assertion check that each of the tables in its 'froms'
# list must be referenced by either the object's expression or an
# auxiliary table join condition.
#
# Revision 1.1 2002/10/01 17:35:40 gjanee
# Initial revision
#
# -----------------------------------------------------------------------------
# IMPORTS
# -----------------------------------------------------------------------------
import exceptions
import string
import types
import edu.ucsb.adl.middleware
M = edu.ucsb.adl.middleware
import edu.ucsb.adl.bucket99
B99 = edu.ucsb.adl.bucket99
# -----------------------------------------------------------------------------
# ASSERTIONS
# -----------------------------------------------------------------------------
def assertType (object, type):
assert isinstance(object, type), "type error: expecting " + str(type) +\
", got " + str(object.__class__)
def assertPolytype (object, types):
if __debug__:
for type in types:
if isinstance(object, type):
return
s = ""
for type in types:
if s != "":
s += ", "
s += str(type)
assert 0, "type error: expecting one of {" + s + "}, got " +\
str(object.__class__)
def assertListElementType (list, type):
if __debug__:
for object in list:
assertType(object, type)
def assertListElementPolytype (list, types):
if __debug__:
for object in list:
assertPolytype(object, types)
def assertDictionaryElementType (dictionary, keyType, valueType):
if __debug__:
assertListElementType(dictionary.keys(), keyType)
assertListElementType(dictionary.values(), valueType)
def assertBooleanOperator (operator):
if __debug__:
assertType(operator, types.StringType)
assert operator in ["AND", "OR", "AND NOT"], "unrecognized " +\
"boolean operator: " + operator
def assertSetOperator (operator):
if __debug__:
assertType(operator, types.StringType)
assert operator in ["UNION", "INTERSECT", "EXCEPT"], "unrecognized " +\
"set operator: " + operator
def assertBooleanOperatorOperandConsistency (operator, list):
if __debug__:
if len(list) == 1:
s = ""
else:
s = "s"
assert ((operator == "AND" or operator == "OR") and len(list) >= 2) or\
(operator == "AND NOT" and len(list) == 2), "boolean operator/" +\
"operand mismatch: " + operator + " with " + str(len(list)) +\
" operand" + s
def assertSetOperatorOperandConsistency (operator, list):
if __debug__:
if len(list) == 1:
s = ""
else:
s = "s"
assert ((operator == "UNION" or operator == "INTERSECT") and\
len(list) >= 2) or (operator == "EXCEPT" and len(list) == 2),\
"set operator/operand mismatch: " + operator + " with " +\
str(len(list)) + " operand" + s
def assertListElementCommonValue (list, functor):
if __debug__:
if len(list) > 0:
value = functor(list[0])
i = 0
for object in list:
assert functor(object) == value, "list element common " +\
"value check failed: '" + str(value) + "' (element 0) " +\
"!= '" + str(functor(object)) + "' (element " + str(i) +\
")"
i += 1
def assertListElementPredicateAll (list, functor):
if __debug__:
i = 0
for object in list:
assert functor(object), "list element predicate failed: " +\
"element " + str(i)
i += 1
def assertListElementPredicateOneOrMore (list, functor):
if __debug__:
for object in list:
if functor(object):
return
assert 0, "no list element satisfies predicate"
def assertListNonempty (list):
assert len(list) > 0, "empty list"
def assertDictionaryNonempty (dictionary):
assert len(dictionary) > 0, "empty dictionary"
def assertListNoDuplicates (list):
if __debug__:
d = {}
for object in list:
assert not d.has_key(object), "duplicate list element: " +\
str(object)
d[object] = "x"
def assertSelectOrSimpleDisjunction (item):
assertPolytype(item, [Select, Query])
if isinstance(item, Query):
assert item.operator == "UNION", "query is not a disjunction"
assertListElementType(item.list, Select)
def unhandledCase ():
assert 0, "unhandled case"
def abstractMethod ():
assert 0, "abstract method not implemented"
# -----------------------------------------------------------------------------
# EXCEPTIONS
# -----------------------------------------------------------------------------
class QueryError (exceptions.Exception):
def __init__ (self, *args):
self.args = args
self.message = args[0]
# -----------------------------------------------------------------------------
# PUBLIC CONSTANTS
# -----------------------------------------------------------------------------
standardSpatialOperators = ["contains", "is-contained-in", "overlaps"]
standardTemporalOperators = ["contains", "is-contained-in", "overlaps"]
standardHierarchicalOperators = ["is-a"]
standardTextualOperators = ["contains-all-words", "contains-any-words",
"contains-phrase"]
extendedTextualOperators = standardTextualOperators +\
["equals", "matches-pattern"]
standardIdentificationOperators = ["matches"]
standardNumericOperators = ["equals", "not-equals", "less-than",
"less-than-or-equal-to", "greater-than", "greater-than-or-equal-to"]
# -----------------------------------------------------------------------------
# QUERIES
# -----------------------------------------------------------------------------
class Cardinality:
def __init__ (self, value, nullable=0):
assertType(value, types.StringType)
assert value in ["1", "1?", "1+", "0+"], "unrecognized " +\
"cardinality: " + value
assertType(nullable, types.IntType)
self.value = value
self.nullable = nullable
def isOne (self):
return self.value == "1" or self.value == "1?"
def isMany (self):
return self.value == "1+" or self.value == "0+"
def isOptional (self):
return self.value == "1?" or self.value == "0+"
def isMandatory (self):
return self.value == "1" or self.value == "1+"
def isNullable (self):
return self.nullable != 0
def setOne (self):
if self.isOptional():
return Cardinality("1?", self.nullable)
else:
return Cardinality("1", self.nullable)
def setMany (self):
if self.isOptional():
return Cardinality("0+", self.nullable)
else:
return Cardinality("1+", self.nullable)
def setOptional (self):
if self.isOne():
return Cardinality("1?", self.nullable)
else:
return Cardinality("0+", self.nullable)
def setMandatory (self):
if self.isOne():
return Cardinality("1", self.nullable)
else:
return Cardinality("1+", self.nullable)
def clone (self):
return Cardinality(self.value, self.nullable)
class TableRef:
def __init__ (self, name):
assertType(name, types.StringType)
self.name = name
# A key point in understanding the following: 'clone' methods return
# shallow copies, and therefore preserve references to TableRef
# objects; 'copy' methods return deep copies, and in particular,
# create new TableRef objects. (Maybe the names should be reversed, I
# dunno...)
class From:
def __init__ (self, table):
assertType(table, TableRef)
self.table = table
def clone (self):
return From(self.table)
def copy (self):
return From(TableRef(self.table.name))
class MainFrom (From):
def __init__ (self, table, idColumn, cardinality):
From.__init__(self, table)
assertType(idColumn, types.StringType)
assertType(cardinality, Cardinality)
self.idColumn = idColumn
self.cardinality = cardinality
def joinableIdColumn (self):
return self.idColumn
def selectableIdColumn (self):
return self.idColumn
def clone (self):
return MainFrom(self.table, self.idColumn, self.cardinality)
def copy (self):
return MainFrom(TableRef(self.table.name), self.idColumn,
self.cardinality.clone())
class ExternalIdTable (MainFrom):
def __init__ (self, table, internalIdColumn, externalIdColumn):
MainFrom.__init__(self, TableRef(table), internalIdColumn,
Cardinality("1+"))
assertType(externalIdColumn, types.StringType)
self.externalIdColumn = externalIdColumn
def selectableIdColumn (self):
return self.externalIdColumn
def copy (self):
return ExternalIdTable(self.table.name, self.idColumn,
self.externalIdColumn)
def clone (self):
e = self.copy()
e.table = self.table
e.cardinality = self.cardinality
return e
class AuxiliaryFrom (From):
def __init__ (self, table, joinCondition):
From.__init__(self, table)
assertType(joinCondition, Expression)
self.joinCondition = joinCondition
def clone (self):
return AuxiliaryFrom(self.table, self.joinCondition)
def copy (self):
# Returns only a partial copy! This method should be called
# from class Select only.
return AuxiliaryFrom(TableRef(self.table.name), self.joinCondition)
class Expression:
def __init__ (self, list):
assertType(list, types.ListType)
assertListElementPolytype(list,
[types.StringType, TableRef, Select, Query])
self.outermostOperator = "atom"
self.list = list
def isEmpty (self):
return len(self.list) == 0
def isConstant (self):
return len(self.list) == 1 and\
isinstance(self.list[0], types.StringType) and\
(self.list[0] == "1 = 1" or self.list[0] == "1 = 0")
def combine (self, operator, expression):
assertBooleanOperator(operator)
assertType(expression, Expression)
if operator == "AND NOT":
return self.combine("AND", expression.negate())
if expression.isEmpty():
return self
if self.isEmpty():
return expression
if expression.isConstant():
if operator == "AND" and expression.list[0] == "1 = 0":
return Expression(["1 = 0"])
elif operator == "OR" and expression.list[0] == "1 = 1":
return Expression(["1 = 1"])
else:
return self
if self.isConstant():
if operator == "AND" and self.list[0] == "1 = 0":
return Expression(["1 = 0"])
elif operator == "OR" and self.list[0] == "1 = 1":
return Expression(["1 = 1"])
else:
return expression
left = self.list
if self.outermostOperator != "atom" and\
self.outermostOperator != operator:
left = ["("] + left + [")"]
right = expression.list
if expression.outermostOperator != "atom" and\
expression.outermostOperator != operator:
right = ["("] + right + [")"]
e = Expression([])
e.outermostOperator = operator
e.list = left + [" " + operator + " "] + right
return e
def negate (self):
assert not self.isEmpty(), "cannot negate empty expression"
if self.isConstant():
if self.list[0] == "1 = 1":
return Expression(["1 = 0"])
else:
return Expression(["1 = 1"])
else:
e = Expression([])
e.list = ["NOT ("] + self.list + [")"]
return e
def render (self, prefixes):
assertType(prefixes, types.DictionaryType)
assertDictionaryElementType(prefixes, TableRef, types.StringType)
assert not self.isEmpty(), "cannot render empty expression"
s = ""
for item in self.list:
if isinstance(item, types.StringType):
s += item
elif isinstance(item, TableRef):
assert prefixes.has_key(item), "referenced table has no " +\
"prefix: " + item.name
s += prefixes[item]
elif isinstance(item, Select) or isinstance(item, Query):
s += "(" + item.renderGivenPrefixes(prefixes) + ")"
else:
unhandledCase()
return s
def getTableRefs (self):
tables = []
for item in self.list:
if isinstance(item, TableRef):
if item not in tables:
tables += [item]
return tables
def getTableRefsRecursive (self):
tables = self.getTableRefs()
for item in self.list:
if isinstance(item, Select) or isinstance(item, Query):
rTables = item.getTableRefsRecursive()
for table in rTables:
if table not in tables:
tables += [table]
return tables
def replaceTableRefs (self, old, new):
assertType(old, TableRef)
assertType(new, TableRef)
list = self.list + []
for i in range(len(list)):
if isinstance(list[i], TableRef) and list[i] == old:
list[i] = new
e = Expression([])
e.list = list
e.outermostOperator = self.outermostOperator
return e
def recursiveCopySupport (self):
# For use by class Select only.
newList = []
for item in self.list:
if isinstance(item, Select) or isinstance(item, Query):
newList += [item.copy()]
else:
newList += [item]
e = Expression([])
e.list = newList
e.outermostOperator = self.outermostOperator
return e
class Select:
def __init__ (self, froms, expression):
assertType(froms, types.ListType)
assertListElementType(froms, From)
assertListNonempty(froms)
if __debug__:
declaredTables = []
for f in froms:
declaredTables += [f.table]
assertListNoDuplicates(declaredTables)
assertListElementPredicateOneOrMore(froms,
lambda f: isinstance(f, MainFrom))
assertType(expression, Expression)
assert not expression.isEmpty(), "empty expression"
if __debug__:
tables = expression.getTableRefs()
for f in froms:
if isinstance(f, AuxiliaryFrom):
assert not f.joinCondition.isEmpty(),\
"empty auxiliary table join condition"
for table in f.joinCondition.getTableRefs():
if table not in tables:
tables += [table]
for table in tables:
assert table in declaredTables, "referenced table not " +\
"declared in 'froms' list: " + table.name
self.froms = froms
self.expression = expression
self.optimizerHint = None
def getTableRefsRecursive (self):
tables = self.expression.getTableRefsRecursive()
for f in self.froms:
if f.table not in tables:
tables += [f.table]
if isinstance(f, AuxiliaryFrom):
for table in f.joinCondition.getTableRefsRecursive():
if table not in tables:
tables += [table]
return tables
def render (self):
prefixes = {}
i = 0
tables = self.getTableRefsRecursive()
for table in tables:
if i < 26:
prefixes[table] = chr(ord("A")+i)
else:
prefixes[table] = "T" + str(i+1)
i += 1
return self.renderGivenPrefixes(prefixes)
def renderGivenPrefixes (self, prefixes):
assertType(prefixes, types.DictionaryType)
assertDictionaryElementType(prefixes, TableRef, types.StringType)
firstMainFrom = None
s = ""
for f in self.froms:
if firstMainFrom == None and isinstance(f, MainFrom):
firstMainFrom = f
if s != "":
s += ", "
assert prefixes.has_key(f.table), "referenced table has no " +\
"prefix: " + f.table.name
s += f.table.name + " " + prefixes[f.table]
t = "SELECT "
if self.optimizerHint != None:
t += self.optimizerHint + " "
s = t + prefixes[firstMainFrom.table] + "." +\
firstMainFrom.selectableIdColumn() + " FROM " + s + " WHERE "
e = Expression([])
for f in self.froms:
if isinstance(f, MainFrom):
if f != firstMainFrom:
e = e.combine("AND", Expression([f.table, "." +
f.joinableIdColumn() + " = ", firstMainFrom.table,
"." + firstMainFrom.joinableIdColumn()]))
elif isinstance(f, AuxiliaryFrom):
e = e.combine("AND", f.joinCondition)
else:
unhandledCase()
e = e.combine("AND", self.expression)
return s + e.render(prefixes)
def prependExternalIdTable (self, f):
assertType(f, ExternalIdTable)
s = Select([f] + self.froms, self.expression)
s.optimizerHint = self.optimizerHint
return s
def setOptimizerHint (self, optimizerHint):
assertType(optimizerHint, types.StringType)
s = Select(self.froms, self.expression)
s.optimizerHint = optimizerHint
return s
def copy (self):
newFroms = []
for f in self.froms:
newFroms += [f.copy()]
newExpression = self.expression
i = 0
while i < len(newFroms):
newExpression = newExpression.replaceTableRefs(self.froms[i].table,
newFroms[i].table)
for f in newFroms:
if isinstance(f, AuxiliaryFrom):
f.joinCondition = f.joinCondition.replaceTableRefs(
self.froms[i].table, newFroms[i].table)
i += 1
newExpression = newExpression.recursiveCopySupport()
for f in newFroms:
if isinstance(f, AuxiliaryFrom):
f.joinCondition = f.joinCondition.recursiveCopySupport()
s = Select(newFroms, newExpression)
s.optimizerHint = self.optimizerHint
return s
class Query:
def __init__ (self, operator, list):
assertSetOperator(operator)
assertType(list, types.ListType)
assertListElementPolytype(list, [Select, Query])
assertSetOperatorOperandConsistency(operator, list)
self.operator = operator
self.list = list
def getTableRefsRecursive (self):
tables = []
for item in self.list:
for table in item.getTableRefsRecursive():
if table not in tables:
tables += [table]
return tables
def render (self):
prefixes = {}
i = 0
tables = self.getTableRefsRecursive()
for table in tables:
if i < 26:
prefixes[table] = chr(ord("A")+i)
else:
prefixes[table] = "T" + str(i+1)
i += 1
return self.renderGivenPrefixes(prefixes)
def renderGivenPrefixes (self, prefixes):
assertType(prefixes, types.DictionaryType)
assertDictionaryElementType(prefixes, TableRef, types.StringType)
s = ""
for item in self.list:
if s != "":
s += " " + self.operator + " "
if self.operator == "UNION" or self.operator == "INTERSECT":
s += "ALL "
s += "(" + item.renderGivenPrefixes(prefixes) + ")"
return s
def prependExternalIdTable (self, f):
assertType(f, ExternalIdTable)
list = []
for item in self.list:
list += [item.prependExternalIdTable(f.copy())]
return Query(self.operator, list)
def setOptimizerHint (self, optimizerHint):
assertType(optimizerHint, types.StringType)
list = []
for item in self.list:
list += [item.setOptimizerHint(optimizerHint)]
return Query(self.operator, list)
def copy (self):
newList = []
for item in self.list:
newList += [item.copy()]
return Query(self.operator, newList)
# -----------------------------------------------------------------------------
# PARADIGMS
# -----------------------------------------------------------------------------
class Paradigm:
def translateBucketAtomic (self, constraint, vocabularies):
assertType(constraint, M.Query.SimpleConstraint)
# assert constraint.getField() == None
assertType(vocabularies, types.ArrayType)
assertListElementType(vocabularies, B99.Vocabulary)
abstractMethod()
def translateBucketBoolean (self, operator, constraints, vocabularies):
assertBooleanOperator(operator)
assertType(constraints, types.ListType)
assertListElementType(constraints, M.Query.SimpleConstraint)
# (Furthermore, all constraints are instances of the same
# subclass of SimpleConstraint.)
assertBooleanOperatorOperandConsistency(operator, constraints)
assertListElementCommonValue(constraints, lambda c: c.getBucket())
# assertListElementPredicateAll(constraints,
# lambda c: c.getField() == None)
assertType(vocabularies, types.ArrayType)
assertListElementType(vocabularies, B99.Vocabulary)
return None
def translateFieldAtomic (self, constraint, vocabularies):
assertType(constraint, M.Query.SimpleConstraint)
assert constraint.getField() != None
assertType(vocabularies, types.ArrayType)
assertListElementType(vocabularies, B99.Vocabulary)
raise QueryError, "bucket does not support field constraints: " +\
constraint.getBucket()
def translateFieldBoolean (self, operator, constraints, vocabularies):
assertBooleanOperator(operator)
assertType(constraints, types.ListType)
assertListElementType(constraints, M.Query.SimpleConstraint)
# (Furthermore, all constraints are instances of the same
# subclass of SimpleConstraint.)
assertBooleanOperatorOperandConsistency(operator, constraints)
assertListElementCommonValue(constraints, lambda c: c.getBucket())
assertListElementPredicateAll(constraints,
lambda c: c.getField() != None)
assertListElementCommonValue(constraints, lambda c: c.getField().uri)
assertType(vocabularies, types.ArrayType)
assertListElementType(vocabularies, B99.Vocabulary)
return None
class FieldSearchableParadigm (Paradigm):
pass
# -----------------------------------------------------------------------------
# BUCKETS & BUCKET TYPES
# -----------------------------------------------------------------------------
_bucketTypeConstraintClassMapping = {
"spatial" : M.Query.SpatialConstraint,
"temporal" : M.Query.TemporalConstraint,
"hierarchical" : M.Query.HierarchicalConstraint,
"textual" : M.Query.TextualConstraint,
"identification" : M.Query.IdentificationConstraint,
"numeric" : M.Query.NumericConstraint,
"relational" : M.Query.RelationalConstraint
}
_constraintClassBucketTypeMapping = {
M.Query.SpatialConstraint : "spatial",
M.Query.TemporalConstraint : "temporal",
M.Query.HierarchicalConstraint : "hierarchical",
M.Query.TextualConstraint : "textual",
M.Query.IdentificationConstraint : "identification",
M.Query.NumericConstraint : "numeric",
M.Query.RelationalConstraint : "relational"
}
def _constraintType (constraint):
assertType(constraint, M.Query.SimpleConstraint)
for type in _constraintClassBucketTypeMapping.keys():
if isinstance(constraint, type):
return _constraintClassBucketTypeMapping[type]
assert 0, "unrecognized constraint type: " +\
constraint.getClass().getName()
class Bucket:
def __init__ (self, type, operators, paradigm):
assertType(type, types.StringType)
assert type in _bucketTypeConstraintClassMapping.keys(),\
"unrecognized bucket type: " + type
assertType(operators, types.ListType)
assertListElementType(operators, types.StringType)
assertType(paradigm, Paradigm)
self.type = type
self.operators = operators
self.paradigm = paradigm
# -----------------------------------------------------------------------------
# QUERY TRANSLATION
# -----------------------------------------------------------------------------
def _translateAtomicConstraint (constraint, buckets, vocabularies):
assertType(constraint, M.Query.SimpleConstraint)
assertType(buckets, types.DictionaryType)
assertDictionaryElementType(buckets, types.StringType, Bucket)
assertType(vocabularies, types.ArrayType)
assertListElementType(vocabularies, B99.Vocabulary)
if not buckets.has_key(constraint.getBucket()):
raise QueryError, "unsupported bucket: " + constraint.getBucket()
bucket = buckets[constraint.getBucket()]
if _constraintType(constraint) != bucket.type:
raise QueryError, "unsupported constraint type for bucket '" +\
constraint.getBucket() + "': " + _constraintType(constraint)
if constraint.getOperator() not in bucket.operators:
raise QueryError, "unsupported operator in constraint on bucket '" +\
constraint.getBucket() + "': " + constraint.getOperator()
if constraint.getField() != None:
query = bucket.paradigm.translateFieldAtomic(constraint, vocabularies)
else:
query = bucket.paradigm.translateBucketAtomic(constraint, vocabularies)
assertPolytype(query, [Select, Query])
return query
def _translateBooleanConstraintCommonBucketAndField (operator, constraints,
buckets, vocabularies):
assertBooleanOperator(operator)
assertType(constraints, types.ListType)
assertListElementType(constraints, M.Query.SimpleConstraint)
assertBooleanOperatorOperandConsistency(operator, constraints)
assertListElementCommonValue(constraints, lambda c: c.getBucket())
assertListElementCommonValue(constraints, lambda c: _constraintType(c))
if __debug__:
if constraints[0].getField() != None:
assertListElementPredicateAll(constraints,
lambda c: c.getField() != None)
assertListElementCommonValue(constraints,
lambda c: c.getField().uri)
else:
assertListElementPredicateAll(constraints,
lambda c: c.getField() == None)
assertType(buckets, types.DictionaryType)
assertDictionaryElementType(buckets, types.StringType, Bucket)
assertType(vocabularies, types.ArrayType)
assertListElementType(vocabularies, B99.Vocabulary)
if not buckets.has_key(constraints[0].getBucket()):
raise QueryError, "unsupported bucket: " + constraints[0].getBucket()
bucket = buckets[constraints[0].getBucket()]
if _constraintType(constraints[0]) != bucket.type:
raise QueryError, "unsupported constraint type for bucket '" +\
constraints[0].getBucket() + "': " +\
_constraintType(constraints[0])
for constraint in constraints:
if constraint.getOperator() not in bucket.operators:
raise QueryError, "unsupported operator in constraint on " +\
"bucket '" + constraint.getBucket() + "': " +\
constraint.getOperator()
if constraints[0].getField() != None:
query = bucket.paradigm.translateFieldBoolean(operator, constraints,
vocabularies)
else:
query = bucket.paradigm.translateBucketBoolean(operator, constraints,
vocabularies)
assertPolytype(query, [Select, Query, types.NoneType])
return query
_booleanOperatorMapping = {
M.Query.BooleanConstraint.AND : "AND",
M.Query.BooleanConstraint.OR : "OR",
M.Query.BooleanConstraint.AND_NOT : "AND NOT"
}
def _translateBooleanConstraint (constraint, buckets, vocabularies):
assertType(constraint, M.Query.BooleanConstraint)
assertType(buckets, types.DictionaryType)
assertDictionaryElementType(buckets, types.StringType, Bucket)
assertType(vocabularies, types.ArrayType)
assertListElementType(vocabularies, B99.Vocabulary)
assert constraint.getOperator() in _booleanOperatorMapping.keys(),\
"unrecognized boolean operator code: " + str(constraint.getOperator())
operator = _booleanOperatorMapping[constraint.getOperator()]
assertType(constraint.getOperands(), types.ArrayType)
assertListElementType(constraint.getOperands(), M.Query.Constraint)
operands = list(constraint.getOperands())
assertBooleanOperatorOperandConsistency(operator, operands)
queries = []
while len(operands) > 0:
operand = operands.pop(0)
if isinstance(operand, M.Query.BooleanConstraint):
queries += [_translateBooleanConstraint(operand, buckets,
vocabularies)]
else:
inSet = [operand]
outSet = []
for o in operands:
if isinstance(o, M.Query.SimpleConstraint) and\
o.getBucket() == operand.getBucket() and\
((o.getField() != None and operand.getField() != None and\
o.getField().uri == operand.getField().uri) or\
(o.getField() == None and\
operand.getField() == None)) and\
_constraintType(o) == _constraintType(operand):
inSet += [o]
else:
outSet += [o]
if len(inSet) >= 2:
query = _translateBooleanConstraintCommonBucketAndField(
operator, inSet, buckets, vocabularies)
if query != None:
queries += [query]
operands = outSet
else:
queries += [_translateAtomicConstraint(operand,
buckets, vocabularies)]
else:
queries += [_translateAtomicConstraint(operand, buckets,
vocabularies)]
return _unify(operator, queries)
def _translateConstraint (constraint, buckets, vocabularies):
assertType(constraint, M.Query.Constraint)
assertType(buckets, types.DictionaryType)
assertDictionaryElementType(buckets, types.StringType, Bucket)
assertType(vocabularies, types.ArrayType)
assertListElementType(vocabularies, B99.Vocabulary)
if isinstance(constraint, M.Query.BooleanConstraint):
query = _translateBooleanConstraint(constraint, buckets, vocabularies)
else:
query = _translateAtomicConstraint(constraint, buckets, vocabularies)
return query
class Translator:
def __init__ (self, buckets, externalIdTable=None, optimizerHint=None,
rewriteSetOpsAsSubqueries=0):
assertType(buckets, types.DictionaryType)
assertDictionaryElementType(buckets, types.StringType, Bucket)
assertPolytype(externalIdTable, [ExternalIdTable, types.NoneType])
assertPolytype(optimizerHint, [types.StringType, types.NoneType])
assertType(rewriteSetOpsAsSubqueries, types.IntType)
self.buckets = buckets
self.externalIdTable = externalIdTable
self.optimizerHint = optimizerHint
self.rewriteSetOpsAsSubqueries = rewriteSetOpsAsSubqueries
def translate (self, constraint, vocabularies):
assertType(constraint, M.Query.Constraint)
assertType(vocabularies, types.ArrayType)
assertListElementType(vocabularies, B99.Vocabulary)
try:
query = _translateConstraint(constraint, self.buckets,
vocabularies)
except QueryError, e:
return M.UnsupportedException(
"[UniversalTranslator.py] translation error: " + e.message)
if self.rewriteSetOpsAsSubqueries:
query = _removeSetOperators(query)
if self.externalIdTable != None:
query = query.prependExternalIdTable(self.externalIdTable)
if self.optimizerHint != None:
query = query.setOptimizerHint(self.optimizerHint)
return query.render()
# -----------------------------------------------------------------------------
# QUERY REWRITING
# -----------------------------------------------------------------------------
def _removeSetOperators (query):
assertPolytype(query, [Select, Query])
if isinstance(query, Select):
return query
else:
newList = []
for item in query.list:
newList += [_removeSetOperators(item)]
while len(newList) >= 2:
newList[1] = _rewriteSetOperatorAsSubquery(newList[0],
query.operator, newList[1])
newList = newList[1:]
return newList[0]
def _rewriteSetOperatorAsSubquery (left, operator, right):
assertSelectOrSimpleDisjunction(left)
assertSetOperator(operator)
assertSelectOrSimpleDisjunction(right)
# Rewrites two queries combined with a set operator as a single
# query. Generally this is done by adding an AND IN (...) or AND
# NOT IN (...) subquery clause to one of the queries. Both the
# input queries to this function, and the returned query, may be
# either a single Select or a simple disjunction of Selects.
if operator != "EXCEPT":
if isinstance(left, Select) and isinstance(right, Query):
x = left
left = right
right = x
if operator == "UNION":
if isinstance(left, Select):
return Query("UNION", [left, right])
elif isinstance(right, Select):
return Query("UNION", left.list + [right])
else:
return Query("UNION", left.list + right.list)
elif operator == "INTERSECT":
if isinstance(left, Select):
return _combineSelects(left, right, 0)
elif isinstance(right, Select):
newList = []
for q in left.list:
newList += [_combineSelects(q, right.copy(), 0)]
return Query("UNION", newList)
else:
newList = []
for q in left.list:
for r in right.list:
newList += [_combineSelects(q.copy(), r.copy(), 0)]
return Query("UNION", newList)
elif operator == "EXCEPT":
if isinstance(left, Select):
if isinstance(right, Select):
return _combineSelects(left, right, 1)
else:
for q in right.list:
left = _combineSelects(left, q, 1)
return left
else:
if isinstance(right, Select):
newList = []
for q in left.list:
newList += [_combineSelects(q, right.copy(), 1)]
return Query("UNION", newList)
else:
newList = []
for q in left.list:
q = q.copy()
for r in right.list:
q = _combineSelects(q, r.copy(), 1)
newList += [q]
return Query("UNION", newList)
else:
unhandledCase()
def _combineSelects (outer, inner, negate):
assertType(outer, Select)
assertType(inner, Select)
assertType(negate, types.IntType)
mainFrom = None
for f in outer.froms:
if isinstance(f, MainFrom):
mainFrom = f
break
assert mainFrom != None, "no main table"
if negate:
negateClause = " NOT"
else:
negateClause = ""
return Select(
outer.froms,
outer.expression.combine("AND", Expression([
mainFrom.table, "." + mainFrom.joinableIdColumn() + negateClause +
" IN ", inner])))
# -----------------------------------------------------------------------------
# UNIFICATION
# -----------------------------------------------------------------------------
_booleanOperatorSetOperatorMapping = {
"AND" : "INTERSECT",
"OR" : "UNION",
"AND NOT" : "EXCEPT"
}
class _EscapeHatch (exceptions.Exception):
pass
def _unify (operator, queries):
# Note: this function, nominally internal to this module, is also
# called by the Adaptor_Union and Adaptor_Concatenation paradigms.
assertBooleanOperator(operator)
assertType(queries, types.ListType)
assertListElementPolytype(queries, [Select, Query])
queries = queries + []
didSomething = 1
while didSomething:
didSomething = 0
try:
for i in range(len(queries)):
if isinstance(queries[i], Select):
for j in range(i+1, len(queries)):
if isinstance(queries[j], Select):
if operator == "AND":
combinedQuery = _unifyBooleanAnd(
queries[i], queries[j])
elif operator == "OR":
combinedQuery = _unifyBooleanOr(
queries[i], queries[j])
elif operator == "AND NOT":
combinedQuery = _unifyBooleanAndNot(
queries[i], queries[j])
else:
unhandledCase()
if combinedQuery != None:
del queries[j]
queries[i] = combinedQuery
raise _EscapeHatch
except _EscapeHatch:
didSomething = 1
if len(queries) >= 2:
operator = _booleanOperatorSetOperatorMapping[operator]
newQueries = []
for query in queries:
if isinstance(query, Select) or operator == "EXCEPT":
newQueries += [query]
else:
if query.operator == operator:
for item in query.list:
newQueries += [item]
else:
newQueries += [query]
return Query(operator, newQueries)
else:
return queries[0]
def _unifyMultipleTableReferences (select):
assertType(select, Select)
# If a main table is referenced multiple times in a SELECT
# statement, each of its references T for which
# T.cardinality.isOne() can be unified into a single reference.
# Furthermore, a reference T1 for which T1.cardinality.isOne() can
# be unified into a reference T2 (to the same table, of course)
# for which T2.cardinality.isMany(). Thus if there are M isOne()
# references and N isMany() references to a given table T in the
# original SELECT statement, M >= 0 and N >= 0 and MAX(M, N) > 0,
# then after calling this function there will be MAX(N, 1)
# references to T.
froms = select.froms + []
expression = select.expression
i = 0
while i < len(froms):
if isinstance(froms[i], MainFrom):
j = i + 1
while j < len(froms):
if isinstance(froms[j], MainFrom) and\
froms[j].table.name == froms[i].table.name and\
froms[j].idColumn == froms[i].idColumn:
if froms[i].cardinality.isOne() and\
froms[j].cardinality.isMany():
f = froms[i]
froms[i] = froms[j]
froms[j] = f
if froms[j].cardinality.isOne():
if froms[j].cardinality.isNullable() and\
not froms[i].cardinality.isNullable():
froms[i] = froms[i].clone()
froms[i].cardinality = froms[i].cardinality.clone()
froms[i].cardinality.nullable = 1
expression = expression.replaceTableRefs(
froms[j].table, froms[i].table)
for k in range(len(froms)):
if isinstance(froms[k], AuxiliaryFrom):
froms[k] = AuxiliaryFrom(froms[k].table,
froms[k].joinCondition.replaceTableRefs(
froms[j].table, froms[i].table))
del froms[j]
else:
j += 1
else:
j += 1
i += 1
return Select(froms, expression)
def _unifyBooleanAnd (select1, select2):
assertType(select1, Select)
assertType(select2, Select)
# Consider two SELECT statements S1 and S2, each querying at least
# one main table, and the expression S1 INTERSECT S2. The
# statements can always be INNER JOINed on main tables into a
# single SELECT.
froms = select1.froms + select2.froms
expression = select1.expression.combine("AND", select2.expression)
return _unifyMultipleTableReferences(Select(froms, expression))
def _unifyBooleanOr (select1, select2):
assertType(select1, Select)
assertType(select2, Select)
# Consider two SELECT statements S1 and S2, each querying at least
# one main table, and the expression S1 UNION S2. The statements
# can be INNER JOINed on main tables only if all main table
# references T have T.cardinality.isMandatory().
for f in (select1.froms + select2.froms):
if isinstance(f, MainFrom):
if f.cardinality.isOptional():
return None
# Consider INNER JOINing two SELECT statements such that the
# combined WHERE clause is the OR of the constituent WHERE
# clauses. Suppose further that the SELECT statements have been
# run through _unifyMultipleTableReferences, so that the remaining
# main table references are separately non-unifiable. Then for
# each table that is referenced by both statements, there are
# three cases of unification: 1) if each statement has a (single)
# reference T for which T.cardinality.isOne(), then the two
# references may be unified; 2) if one statement has an isOne()
# reference and the other statement has one or more isMany()
# references, then the isOne() reference may be unified into one
# of the isMany() references; and 3) (this case is unique to the
# OR operator) if one statement has M isMany() references and the
# other statement has N isMany() references, M > 0 and N > 0, then
# the references may be pairwise unified into MIN(M, N)
# references.
select1 = _unifyMultipleTableReferences(select1)
select2 = _unifyMultipleTableReferences(select2)
expression = select1.expression.combine("OR", select2.expression)
leftSet = []
for f in select1.froms:
leftSet += [f.clone()]
rightSet = []
for f in select2.froms:
rightSet += [f.clone()]
finalSet = []
replacements = []
while len(leftSet) > 0:
if isinstance(leftSet[0], MainFrom):
lSet = [leftSet.pop(0)]
outSet = []
for f in leftSet:
if isinstance(f, MainFrom) and\
f.table.name == lSet[0].table.name and\
f.idColumn == lSet[0].idColumn:
lSet += [f]
else:
outSet += [f]
leftSet = outSet
rSet = []
outSet = []
for f in rightSet:
if isinstance(f, MainFrom) and\
f.table.name == lSet[0].table.name and\
f.idColumn == lSet[0].idColumn:
rSet += [f]
else:
outSet += [f]
rightSet = outSet
if len(lSet) >= len(rSet):
inSet = lSet
outSet = rSet
else:
inSet = rSet
outSet = lSet
finalSet += inSet
for i in range(len(outSet)):
replacements += [(outSet[i], inSet[i])]
if outSet[i].cardinality.isMany() and\
inSet[i].cardinality.isOne():
inSet[i].cardinality = inSet[i].cardinality.setMany()
if outSet[i].cardinality.isNullable() and\
not inSet[i].cardinality.isNullable():
inSet[i].cardinality = inSet[i].cardinality.clone()
inSet[i].cardinality.nullable = 1
else:
finalSet += [leftSet.pop(0)]
finalSet += rightSet
for (old, new) in replacements:
expression = expression.replaceTableRefs(old.table, new.table)
for f in finalSet:
if isinstance(f, AuxiliaryFrom):
f.joinCondition =\
f.joinCondition.replaceTableRefs(old.table, new.table)
return Select(finalSet, expression)
def _unifyBooleanAndNot (select1, select2):
assertType(select1, Select)
assertType(select2, Select)
# Consider two SELECT statements S1 and S2, each querying at least
# one main table, and the expression S1 EXCEPT S2. The statements
# can be INNER JOINed on main tables into a single SELECT, and
# main table references can be unified, if and only if no main
# table reference T in S2 has T.cardinality.isOptional() or
# T.cardinality.isMany() or T.cardinality.isNullable().
for f in select2.froms:
if isinstance(f, MainFrom):
if f.cardinality.isOptional() or f.cardinality.isMany() or\
f.cardinality.isNullable():
return None
froms = select1.froms + select2.froms
expression = select1.expression.combine("AND NOT", select2.expression)
return _unifyMultipleTableReferences(Select(froms, expression))
# -----------------------------------------------------------------------------
# PARADIGM CONVENIENCES
# -----------------------------------------------------------------------------
def protectQuotes (text):
assertType(text, types.StringType)
return string.replace(text, "'", "''")
def protectLikeWildcards (text):
assertType(text, types.StringType)
return\
string.replace(
string.replace(
string.replace(
string.replace(
text,
"'", "''"),
"\\", "\\\\"),
"%", "\%"),
"_", "\_")
def constantTrueQuery (table, idColumn, nullableColumn, cardinality):
# Returns a constant TRUE by selecting all rows of table 'table'.
# If the cardinality of the table has isNullable(), then only
# those rows for which column 'nullableColumn' is not NULL are
# selected.
assertType(table, types.StringType)
assertType(idColumn, types.StringType)
assertType(nullableColumn, types.StringType)
assertType(cardinality, Cardinality)
table = TableRef(table)
if cardinality.isNullable():
return Select(
[MainFrom(table, idColumn, Cardinality(cardinality.value))],
Expression([table, "." + nullableColumn + " IS NOT NULL"]))
else:
return Select(
[MainFrom(table, idColumn, cardinality.setOne())],
Expression(["1 = 1"]))
def constantFalseQuery (table, idColumn, cardinality):
# Returns a constant FALSE by selecting no rows of a given table.
assertType(table, types.StringType)
assertType(idColumn, types.StringType)
assertType(cardinality, Cardinality)
return Select(
[MainFrom(TableRef(table), idColumn,
Cardinality(cardinality.value).setOne())],
Expression(["1 = 0"]))
def falsify (item):
# Accepts a Select or Query object and returns a new Select object
# that returns zero rows. The algorithm below is greatly
# complicated by the fact that we try to use a table reference
# that has cardinality isMandatory(), since such references have
# better joinability properties.
assertPolytype(item, [Select, Query])
mainFrom = None
list = [item]
while len(list) > 0 and\
(mainFrom == None or mainFrom.cardinality.isOptional()):
item = list.pop(0)
if isinstance(item, Select):
list = item.froms + list
elif isinstance(item, Query):
list = item.list + list
else:
if isinstance(item, MainFrom):
mainFrom = item
assert mainFrom != None, "no main table"
return constantFalseQuery(mainFrom.table.name, mainFrom.idColumn,
mainFrom.cardinality)