############################################################################### ## ## ## ALEXANDRIA DIGITAL LIBRARY ## ## University of California at Santa Barbara ## ## ## ## ------------------------------------------------------------------------- ## ## ## ## Copyright (c) 2004 by the Regents of the University of California ## ## All rights reserved ## ## ## ## Redistribution and use in source and binary forms, with or without ## ## modification, are permitted provided that the following conditions are ## ## met: ## ## ## ## 1. Redistributions of source code must retain the above copyright ## ## notice, this list of conditions, and the following disclaimer. ## ## ## ## 2. Redistributions in binary form must reproduce the above copyright ## ## notice, this list of conditions, and the following disclaimer in ## ## the documentation and/or other materials provided with the ## ## distribution. ## ## ## ## 3. All advertising materials mentioning features or use of this ## ## software must display the following acknowledgement: This product ## ## includes software developed by the Alexandria Digital Library, ## ## University of California at Santa Barbara, and its contributors. ## ## ## ## 4. Neither the name of the University nor the names of its ## ## contributors may be used to endorse or promote products derived ## ## from this software without specific prior written permission. ## ## ## ## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY ## ## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ## ## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE ## ## DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ## ## ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ## ## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ## ## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ## ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ## ## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ## ## ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ## ## POSSIBILITY OF SUCH DAMAGE. ## ## ## ############################################################################### # $Header: /export/home/gjanee/bucket99/RCS/UniversalTranslator.py,v 1.11 2004/02/24 22:42:21 gjanee Exp $ # SYNOPSIS # # class Translator (buckets, externalIdTable=None, optimizerHint=None, # rewriteSetOpsAsSubqueries=0) # # An object that holds the information needed to translate # constraints. It is initialized with: # # buckets # A dictionary that maps the names of supported buckets # (e.g., "geographic-locations") to Bucket objects. # # externalIdTable # Optionally, an ExternalIdTable object that describes a # many-to-one table that maps internal identifiers (the # identifiers used to join tables internally) to external # identifiers (the identifiers that are ultimately # returned). # # optimizerHint # Optionally, an optimizer hint, e.g., # "/*+ FIRST_ROWS(1000) */". The hint is placed # immediately after the SELECT keyword in each outermost # SELECT clause. # # rewriteSetOpsAsSubqueries # A boolean that indicates if INTERSECT and EXCEPT set # operations should be rewritten using subqueries. The # default is false. If true, the returned query will be # either a single SELECT statement or a simple UNION of # two or more SELECT statements (and therefore in # disjunctive normal form). # # def Translator.translate (constraint, vocabularies) # # Translates a constraint. Returns either an SQL query as a # string or an edu.ucsb.adl.middleware.MiddlewareException # object. # # constraint # A constraint (specifically, an # edu.ucsb.adl.middleware.Query.Constraint object) to be # translated to SQL. # # vocabularies # The vocabularies (specifically, an array of # edu.ucsb.adl.bucket99.Vocabulary objects) to be used in # the translation. # # DESCRIPTION # # Universal query translator. This translator is intended to be # referenced from the query translation script proper in the # stylized manner shown below: # # import UniversalTranslator # UT = UniversalTranslator # # import paradigms # P = paradigms # # buckets = { ... } # # translator = UT.Translator(buckets) # # def translate (): # return translator.translate(constraint, vocabularies) # # 'buckets' is a dictionary that maps the names of supported # buckets to Bucket objects. A Bucket object describes key # characteristics and the translation mechanism of a bucket. It # is initialized with three arguments: a bucket type (which must # be "spatial", "temporal", "hierarchical", "textual", # "identification", "numeric", or "relational"); a list of the # operators the bucket supports; and a Paradigm object that # implements query translation on behalf of the bucket. For # example: # # buckets = { # "resolution" : # UT.Bucket( # "numeric", # UT.standardNumericOperators, # P.Numeric_Straight("mytable", "id", # "res_meters", {}, UT.Cardinality("1"))) # } # # The above example describes a numeric bucket named "resolution" # that supports the standard numeric operators (see below). The # bucket is implemented by the Numeric_Straight paradigm, which # translates numeric constraints to straight SQL numeric # column/value comparisons. The arguments to the paradigm are # specific to the paradigm, and are described in the paradigm's # documentation. The ultimate effect of the particular # declaration above is that a constraint # # # resolution # less-than # 3.5 # # # is translated to # # SELECT A.id FROM mytable A WHERE A.res_meters < 3.5 # # There are many paradigms; they are located in the 'paradigms' # directory. # # For convenience, the universal translator defines the following # lists of standard operators: # # UT.standardSpatialOperators = # ["contains", "is-contained-in", "overlaps"] # # UT.standardTemporalOperators = # ["contains", "is-contained-in", "overlaps"] # # UT.standardHierarchicalOperators = # ["is-a"] # # UT.standardTextualOperators = # ["contains-all-words", "contains-any-words", # "contains-phrase"] # # UT.extendedTextualOperators = # UT.standardTextualOperators + ["equals", "matches-pattern"] # # UT.standardIdentificationOperators = # ["matches"] # # UT.standardNumericOperators = # ["equals", "not-equals", "less-than", # "less-than-or-equal-to", "greater-than", # "greater-than-or-equal-to"] # # (There are no standard relational operators; instead, the # relations themselves are treated as operators.) # # As noted above, the Translator object can optionally be # initialized with an ExternalIdTable object that describes a # table that maps internal identifiers to external identifiers. # The object has the structure # # ExternalIdTable (table, internalIdColumn, externalIdColumn) # # For example, a configuration # # translator = UT.Translator(buckets, # UT.ExternalIdTable("id_map", "int_id", "ext_id")) # # will cause the translator to convert the nominal query # # SELECT A.id FROM mytable A WHERE A.res_meters < 3.5 # # to # # SELECT A.ext_id FROM id_map A, mytable B WHERE # A.int_id = B.id AND B.res_meters < 3.5 # # FIELD ADAPTORS # # A field adaptor is a special kind of paradigm that adds support # for field-level searching to a paradigm (or set of paradigms) # that doesn't otherwise support it. Adaptors are located in the # 'paradigms' directory. # # PARADIGMS # # The remainder of this documentation describes the universal # translator's paradigm interface. # # A paradigm is a subclass of the Paradigm abstract class; it # should implement a particular technique of constraint # translation. By convention, each paradigm is stored in a # separate file by the same name in the 'paradigms' package # directory, that is, as an eponymous module. Thus paradigm class # 'myparadigm' is defined in file 'paradigms/myparadigm.py'. In # addition, by convention, a pair of statements similar to the # following is added to 'paradigms/__init__.py' for each # paradigm: # # import myparadigm # myparadigm = myparadigm.myparadigm # # This makes it possible to reference a paradigm from the query # translation script proper as simply # # P.myparadigm # # A paradigm *must* implement the following abstract method: # # def translateBucketAtomic (constraint, vocabularies) # # Should translate constraint 'constraint' to SQL and # return a Select or Query object (see below). # 'vocabularies' is as described above, and should not be # modified by the paradigm. The paradigm can assume that # 'constraint' is a simple constraint, i.e., an # edu.ucsb.adl.middleware.Query.SimpleConstraint object, # and that the constraint does not include a field # constraint. The paradigm may treat constraint/paradigm # type incompatibilities and unsupported operator errors # as assertion violations. All other translation errors # should be indicated by raising a QueryError exception. # # A paradigm *may* implement the following method. The default # implementation returns None. # # def translateBucketBoolean (operator, constraints, vocabularies) # # Should translate a boolean combination of constraints # against the same bucket to SQL and return a Select or # Query object. 'operator' is the boolean operator, and # is either "AND", "OR", or "AND NOT". 'constraints' is a # list of constraints. 'vocabularies' is as described # above, and should not be modified by the paradigm. The # paradigm can assume that the operator and constraints # are consistent: if the operator is "AND" or "OR" there # are at least two constraints, and if the operator is # "AND NOT" there are exactly two constraints. The # paradigm can assume that all constraints have the same # type (in particular, are instances of the same subclass # of edu.ucsb.adl.middleware.Query.SimpleConstraint), and # that no constraint includes a field constraint. The # paradigm may treat constraint/paradigm type # incompatibilities and unsupported operator errors as # assertion violations. All other translation errors # should be indicated by raising a QueryError exception. # If the query is valid but the paradigm cannot perform # the translation, it may return None (in which case the # universal translator invokes the paradigm's # 'translateBucketAtomic' method individually on each # constraint and uses SQL operators to combine the # resulting queries). # # A paradigm *may* implement the following method. The default # implementation raises a QueryError exception containing a # message, "bucket does not support field constraints." # # def translateFieldAtomic (constraint, vocabularies) # # Identical to 'translateBucketAtomic' above, except that # the constraint contains a field constraint. # # A paradigm *may* implement the following method. The default # implementation returns None. # # def translateFieldBoolean (operator, constraints, vocabularies) # # Identical to 'translateBucketBoolean' above, except that # every constraint contains the same field constraint. # # Paradigms return SQL in the form of Query and Select objects; # these objects and their sub-objects effectively define a # restricted relational model. In this model, there are two types # of tables: main tables and auxiliary tables. A main table is a # table whose rows represent collection items and whose columns # store metadata associated with those items; one of the columns # must be an identifier column, that is, it must store the # identifiers ultimately returned by a query (modulo one-time # remapping using an ExternalIdTable object as described above). # It is assumed that all identifier columns in all main tables are # equi-joinable. A main table may contain multiple rows # (including zero rows) per collection item. # # An auxiliary table is any table used in conjunction with a main # table in implementing a paradigm. The relationship that ties # an auxiliary table to its main table is called the former's # "join condition." # # For example, consider a relational schema # # CREATE TABLE item ( # item_id INTEGER NOT NULL, # type_id INTEGER NOT NULL, # ..., # PRIMARY KEY (item_id), # FOREIGN KEY (type_id) REFERENCES type # ); # # CREATE TABLE type ( # type_id INTEGER NOT NULL, # name VARCHAR(80) NOT NULL, # PRIMARY KEY (type_id) # ); # # such that a constraint # # # types # is-a # glorp # # # is translated to # # SELECT A.item_id FROM item A, type B # WHERE A.type_id = B.type_id AND B.name = 'glorp' # # In this example we would say that 'item' is a main table with # identifier column 'item_id'. 'type' is an auxiliary table with # join condition "item.type_id = type.type_id". # # We describe next the Query and Select objects in detail. Query # objects are built from Select objects, which in turn are built # from Expression, From, TableRef, and Cardinality objects. # Select objects stand on their own; the Select sub-objects # (Expression, etc.) have meaning only within the context of the # Select object that contains them. # # class Query (operator, list) # # A set-theoretic combination of two or more Select or # Query objects. # # operator # A set operator: "UNION", "INTERSECT", or "EXCEPT". # # list # A list of two or more ("UNION" or "INTERSECT") or # exactly two ("EXCEPT") Select or Query objects. # # class Select (froms, expression) # # A single, self-contained SELECT statement. The # statement may contain subqueries (represented as sub- # Select objects referenced by 'expression'), but there is # no provision for correlated subqueries. # # froms # A list of one or more From objects describing the # tables that are being INNER JOINed and SELECTed # from. At least one From object must be a MainFrom # object, i.e., a main table. # # expression # An Expression object that describes the statement's # WHERE clause. # # The semantics of a Select object are: # # SELECT identifier-column FROM froms WHERE # main-from-equi-join-conditions AND # auxiliary-from-join-conditions AND expression # # class From (table) # # A table that is SELECTed from. This is an abstract # class; concrete subclasses are listed next. # # table # A TableRef object referencing the table. # # class MainFrom (table, idColumn, cardinality) # # A main table that is SELECTed from. # # idColumn # The name of the table's identifier column. # # cardinality # A Cardinality object representing the table's # cardinality with respect to the subset of its # columns referenced in the SELECT statement (see # below). # # class AuxiliaryFrom (table, joinCondition) # # An auxiliary table that is SELECTed from. # # joinCondition # An Expression object that describes the INNER JOIN # condition relating the auxiliary table to its main # table. # # class Cardinality (value, nullable=0) # # The cardinality of a main table with respect to the # subset of its columns referenced in the SELECT # statement, expressed as a range of the possible number # of rows. We say that the table has multiple rows per # identifier if and only if at least one of the table's # columns referenced in the SELECT statement is *not* # functionally dependent on the identifier column. # Otherwise (if every referenced column *is* functionally # dependent on the identifier column) we say that the # table has one row per identifier. The cardinality value # also incorporates an indication of whether the table has # at least one row for every collection item, or if zero # rows are possible. # # value # Either "1" (exactly one row), "1?" (zero or one # rows), "1+" (one or more rows), or "0+" (zero or # more rows). # # nullable # A boolean indicating if any of the table's columns # referenced in the SELECT statement may be NULL, that # is, if the SELECT expression may possibly return the # UNKNOWN value. The default is false. # # A technical point: the cardinality of a main table # really encompasses the cardinalities of its auxiliary # tables, if any. # # class TableRef (name) # # A reference to a table. Instances of this class are # significant; two TableRef objects that name the same # table are considered to be two distinct references to # the same table (see below). # # name # A table name. # # class Expression (list) # # A complete SQL expression. This class handles # appropriate (and minimal) generation of parentheses; for # correct processing, the 'combine' and 'negate' methods # must be used to create boolean combinations of # expression primitives. # # list # A list of elements (strings, TableRef objects, # Select objects, and Query objects) such that the # concatenation of the string forms of the elements # yields the SQL expression. A string is its own # string form. A TableRef object's string form is the # table's SQL alias. The string form of a Select or # Query object is the recursive rendering of the # object placed in parentheses. By convention, the # constant TRUE (FALSE) is represented by the # single-element list ["1 = 1"] (["1 = 0"]). # # def combine (operator, expression) # # Returns a new Expression object that represents a # boolean combination of the expression with another # Expression object. # # operator # A boolean operator: "AND", "OR", or "AND NOT". # # expression # The other Expression object. # # def negate () # # Returns a new Expression object that represents the # negation of the expression. # # As a special case, the expression may be empty (i.e., # 'list' may be the empty list). Such an expression # cannot be a Select object's expression, but it can be # used to combine with other Expression objects, thus # making it easier to code loops in some cases. # # TableRef instances must be created and referenced with care. # Generally speaking, all of a Select object's references to a # table should refer to the same TableRef instance, unless a # self-join is being expressed. For example, the following Select # object is erroneous (and will trigger an assertion violation) # because the Select object's expression does not reference a # table listed in the object's 'froms' list: # # UT.Select( # [UT.MainFrom(UT.TableRef("mytable"), ...)], # UT.Expression([UT.TableRef("mytable"), ".column = ..."])) # # Instead, the following idiom must generally be used: # # table = UT.TableRef("mytable") # # UT.Select( # [UT.MainFrom(table, ...)], # UT.Expression([table, ".column = ..."])) # # Putting it all together, the example query above: # # SELECT A.item_id FROM item A, type B # WHERE A.type_id = B.type_id AND B.name = 'glorp' # # would be represented as the following Select object (assuming # there is a one-to-one relationship between rows in 'item' and # collection items): # # item_table = UT.TableRef("item") # type_table = UT.TableRef("type") # # UT.Select( # [UT.MainFrom(item_table, "item_id", UT.Cardinality("1")), # UT.AuxiliaryFrom( # type_table, # UT.Expression([item_table, ".type_id = ", # type_table, ".type_id"]))], # UT.Expression([type_table, ".name = 'glorp'"])) # # Paradigms that support field-level searching (specifically, # field adaptors) should be derived from abstract class # FieldSearchableParadigm. # # A final note: the programming model used in this translator and # in the paradigms is "transparent immutable objects": objects are # transparent in that their fields are accessed directly, and once # objects are created they are never modified. # # AUTHOR # # Greg Janee # gjanee@alexandria.ucsb.edu # # HISTORY # # $Log: UniversalTranslator.py,v $ # Revision 1.11 2004/02/24 22:42:21 gjanee # Added support for relational constraints. # # Revision 1.10 2004/02/09 23:57:20 gjanee # Added public constant 'extendedTextualOperators'. # # Revision 1.9 2004/02/03 18:17:31 gjanee # Added the 'rewriteSetOpsAsSubqueries' option. # # Revision 1.8 2003/10/28 00:11:37 gjanee # Added abstract class 'FieldSearchableParadigm'. The new policy # on translation of field-level constraints is as follows. All # paradigms that support field-level searching (that is, implement # at least the 'translateFieldAtomic' method), and field adaptors # specifically, should be derived from 'FieldSearchableParadigm'. # A caller invoking a paradigm to translate one or more # field-level constraints should use a field-level method (e.g., # 'translateFieldAtomic') if the paradigm supports field-level # searching, and a bucket-level method (e.g., # 'translateBucketBoolean') otherwise. If a paradigm does not # support field-level searching, its bucket-level methods will be # called (per above) and it should treat all constraints as being # bucket-level, i.e., it should simply ignore the field # specifications in constraints. # # Revision 1.7 2003/01/28 18:19:31 gjanee # Added paradigm convenience functions 'constantTrueQuery' and # 'constantFalseQuery'. # # Revision 1.6 2003/01/23 21:57:20 gjanee # Added a 'nullable' attribute to the Cardinality object to # properly support NULLs (or more accurately, UNKNOWNs). # Clarified and made consistent the programming model used in this # translator and in the paradigms. # # Revision 1.5 2002/11/04 22:44:14 gjanee # Adaptors rely on the convention that paradigms treat all # constraints passed to 'translateBucketAtomic' and # 'translateBucketBoolean' as being bucket-level, even if they are # in fact field-level. Thus, the default implementations of these # methods must behave likewise, and in particular they must not # perform any assertion checks on constraint.getField(). # # Revision 1.4 2002/10/30 04:11:52 gjanee # Added 'protectLikeWildcards'. # # Revision 1.3 2002/10/15 16:55:50 gjanee # Refactored the unification code out of function # '_translateBooleanConstraint' and into a separate function, # '_unify', to allow selected paradigms to access that # functionality. Added 'set...' methods to class Cardinality. # Added the paradigm convenience functions 'protectQuotes' and # 'falsify'. # # Revision 1.2 2002/10/03 21:31:51 gjanee # Added conventional representations for the constants TRUE and # FALSE. To accommodate constant expressions, removed the Select # object's assertion check that each of the tables in its 'froms' # list must be referenced by either the object's expression or an # auxiliary table join condition. # # Revision 1.1 2002/10/01 17:35:40 gjanee # Initial revision # # ----------------------------------------------------------------------------- # IMPORTS # ----------------------------------------------------------------------------- import exceptions import string import types import edu.ucsb.adl.middleware M = edu.ucsb.adl.middleware import edu.ucsb.adl.bucket99 B99 = edu.ucsb.adl.bucket99 # ----------------------------------------------------------------------------- # ASSERTIONS # ----------------------------------------------------------------------------- def assertType (object, type): assert isinstance(object, type), "type error: expecting " + str(type) +\ ", got " + str(object.__class__) def assertPolytype (object, types): if __debug__: for type in types: if isinstance(object, type): return s = "" for type in types: if s != "": s += ", " s += str(type) assert 0, "type error: expecting one of {" + s + "}, got " +\ str(object.__class__) def assertListElementType (list, type): if __debug__: for object in list: assertType(object, type) def assertListElementPolytype (list, types): if __debug__: for object in list: assertPolytype(object, types) def assertDictionaryElementType (dictionary, keyType, valueType): if __debug__: assertListElementType(dictionary.keys(), keyType) assertListElementType(dictionary.values(), valueType) def assertBooleanOperator (operator): if __debug__: assertType(operator, types.StringType) assert operator in ["AND", "OR", "AND NOT"], "unrecognized " +\ "boolean operator: " + operator def assertSetOperator (operator): if __debug__: assertType(operator, types.StringType) assert operator in ["UNION", "INTERSECT", "EXCEPT"], "unrecognized " +\ "set operator: " + operator def assertBooleanOperatorOperandConsistency (operator, list): if __debug__: if len(list) == 1: s = "" else: s = "s" assert ((operator == "AND" or operator == "OR") and len(list) >= 2) or\ (operator == "AND NOT" and len(list) == 2), "boolean operator/" +\ "operand mismatch: " + operator + " with " + str(len(list)) +\ " operand" + s def assertSetOperatorOperandConsistency (operator, list): if __debug__: if len(list) == 1: s = "" else: s = "s" assert ((operator == "UNION" or operator == "INTERSECT") and\ len(list) >= 2) or (operator == "EXCEPT" and len(list) == 2),\ "set operator/operand mismatch: " + operator + " with " +\ str(len(list)) + " operand" + s def assertListElementCommonValue (list, functor): if __debug__: if len(list) > 0: value = functor(list[0]) i = 0 for object in list: assert functor(object) == value, "list element common " +\ "value check failed: '" + str(value) + "' (element 0) " +\ "!= '" + str(functor(object)) + "' (element " + str(i) +\ ")" i += 1 def assertListElementPredicateAll (list, functor): if __debug__: i = 0 for object in list: assert functor(object), "list element predicate failed: " +\ "element " + str(i) i += 1 def assertListElementPredicateOneOrMore (list, functor): if __debug__: for object in list: if functor(object): return assert 0, "no list element satisfies predicate" def assertListNonempty (list): assert len(list) > 0, "empty list" def assertDictionaryNonempty (dictionary): assert len(dictionary) > 0, "empty dictionary" def assertListNoDuplicates (list): if __debug__: d = {} for object in list: assert not d.has_key(object), "duplicate list element: " +\ str(object) d[object] = "x" def assertSelectOrSimpleDisjunction (item): assertPolytype(item, [Select, Query]) if isinstance(item, Query): assert item.operator == "UNION", "query is not a disjunction" assertListElementType(item.list, Select) def unhandledCase (): assert 0, "unhandled case" def abstractMethod (): assert 0, "abstract method not implemented" # ----------------------------------------------------------------------------- # EXCEPTIONS # ----------------------------------------------------------------------------- class QueryError (exceptions.Exception): def __init__ (self, *args): self.args = args self.message = args[0] # ----------------------------------------------------------------------------- # PUBLIC CONSTANTS # ----------------------------------------------------------------------------- standardSpatialOperators = ["contains", "is-contained-in", "overlaps"] standardTemporalOperators = ["contains", "is-contained-in", "overlaps"] standardHierarchicalOperators = ["is-a"] standardTextualOperators = ["contains-all-words", "contains-any-words", "contains-phrase"] extendedTextualOperators = standardTextualOperators +\ ["equals", "matches-pattern"] standardIdentificationOperators = ["matches"] standardNumericOperators = ["equals", "not-equals", "less-than", "less-than-or-equal-to", "greater-than", "greater-than-or-equal-to"] # ----------------------------------------------------------------------------- # QUERIES # ----------------------------------------------------------------------------- class Cardinality: def __init__ (self, value, nullable=0): assertType(value, types.StringType) assert value in ["1", "1?", "1+", "0+"], "unrecognized " +\ "cardinality: " + value assertType(nullable, types.IntType) self.value = value self.nullable = nullable def isOne (self): return self.value == "1" or self.value == "1?" def isMany (self): return self.value == "1+" or self.value == "0+" def isOptional (self): return self.value == "1?" or self.value == "0+" def isMandatory (self): return self.value == "1" or self.value == "1+" def isNullable (self): return self.nullable != 0 def setOne (self): if self.isOptional(): return Cardinality("1?", self.nullable) else: return Cardinality("1", self.nullable) def setMany (self): if self.isOptional(): return Cardinality("0+", self.nullable) else: return Cardinality("1+", self.nullable) def setOptional (self): if self.isOne(): return Cardinality("1?", self.nullable) else: return Cardinality("0+", self.nullable) def setMandatory (self): if self.isOne(): return Cardinality("1", self.nullable) else: return Cardinality("1+", self.nullable) def clone (self): return Cardinality(self.value, self.nullable) class TableRef: def __init__ (self, name): assertType(name, types.StringType) self.name = name # A key point in understanding the following: 'clone' methods return # shallow copies, and therefore preserve references to TableRef # objects; 'copy' methods return deep copies, and in particular, # create new TableRef objects. (Maybe the names should be reversed, I # dunno...) class From: def __init__ (self, table): assertType(table, TableRef) self.table = table def clone (self): return From(self.table) def copy (self): return From(TableRef(self.table.name)) class MainFrom (From): def __init__ (self, table, idColumn, cardinality): From.__init__(self, table) assertType(idColumn, types.StringType) assertType(cardinality, Cardinality) self.idColumn = idColumn self.cardinality = cardinality def joinableIdColumn (self): return self.idColumn def selectableIdColumn (self): return self.idColumn def clone (self): return MainFrom(self.table, self.idColumn, self.cardinality) def copy (self): return MainFrom(TableRef(self.table.name), self.idColumn, self.cardinality.clone()) class ExternalIdTable (MainFrom): def __init__ (self, table, internalIdColumn, externalIdColumn): MainFrom.__init__(self, TableRef(table), internalIdColumn, Cardinality("1+")) assertType(externalIdColumn, types.StringType) self.externalIdColumn = externalIdColumn def selectableIdColumn (self): return self.externalIdColumn def copy (self): return ExternalIdTable(self.table.name, self.idColumn, self.externalIdColumn) def clone (self): e = self.copy() e.table = self.table e.cardinality = self.cardinality return e class AuxiliaryFrom (From): def __init__ (self, table, joinCondition): From.__init__(self, table) assertType(joinCondition, Expression) self.joinCondition = joinCondition def clone (self): return AuxiliaryFrom(self.table, self.joinCondition) def copy (self): # Returns only a partial copy! This method should be called # from class Select only. return AuxiliaryFrom(TableRef(self.table.name), self.joinCondition) class Expression: def __init__ (self, list): assertType(list, types.ListType) assertListElementPolytype(list, [types.StringType, TableRef, Select, Query]) self.outermostOperator = "atom" self.list = list def isEmpty (self): return len(self.list) == 0 def isConstant (self): return len(self.list) == 1 and\ isinstance(self.list[0], types.StringType) and\ (self.list[0] == "1 = 1" or self.list[0] == "1 = 0") def combine (self, operator, expression): assertBooleanOperator(operator) assertType(expression, Expression) if operator == "AND NOT": return self.combine("AND", expression.negate()) if expression.isEmpty(): return self if self.isEmpty(): return expression if expression.isConstant(): if operator == "AND" and expression.list[0] == "1 = 0": return Expression(["1 = 0"]) elif operator == "OR" and expression.list[0] == "1 = 1": return Expression(["1 = 1"]) else: return self if self.isConstant(): if operator == "AND" and self.list[0] == "1 = 0": return Expression(["1 = 0"]) elif operator == "OR" and self.list[0] == "1 = 1": return Expression(["1 = 1"]) else: return expression left = self.list if self.outermostOperator != "atom" and\ self.outermostOperator != operator: left = ["("] + left + [")"] right = expression.list if expression.outermostOperator != "atom" and\ expression.outermostOperator != operator: right = ["("] + right + [")"] e = Expression([]) e.outermostOperator = operator e.list = left + [" " + operator + " "] + right return e def negate (self): assert not self.isEmpty(), "cannot negate empty expression" if self.isConstant(): if self.list[0] == "1 = 1": return Expression(["1 = 0"]) else: return Expression(["1 = 1"]) else: e = Expression([]) e.list = ["NOT ("] + self.list + [")"] return e def render (self, prefixes): assertType(prefixes, types.DictionaryType) assertDictionaryElementType(prefixes, TableRef, types.StringType) assert not self.isEmpty(), "cannot render empty expression" s = "" for item in self.list: if isinstance(item, types.StringType): s += item elif isinstance(item, TableRef): assert prefixes.has_key(item), "referenced table has no " +\ "prefix: " + item.name s += prefixes[item] elif isinstance(item, Select) or isinstance(item, Query): s += "(" + item.renderGivenPrefixes(prefixes) + ")" else: unhandledCase() return s def getTableRefs (self): tables = [] for item in self.list: if isinstance(item, TableRef): if item not in tables: tables += [item] return tables def getTableRefsRecursive (self): tables = self.getTableRefs() for item in self.list: if isinstance(item, Select) or isinstance(item, Query): rTables = item.getTableRefsRecursive() for table in rTables: if table not in tables: tables += [table] return tables def replaceTableRefs (self, old, new): assertType(old, TableRef) assertType(new, TableRef) list = self.list + [] for i in range(len(list)): if isinstance(list[i], TableRef) and list[i] == old: list[i] = new e = Expression([]) e.list = list e.outermostOperator = self.outermostOperator return e def recursiveCopySupport (self): # For use by class Select only. newList = [] for item in self.list: if isinstance(item, Select) or isinstance(item, Query): newList += [item.copy()] else: newList += [item] e = Expression([]) e.list = newList e.outermostOperator = self.outermostOperator return e class Select: def __init__ (self, froms, expression): assertType(froms, types.ListType) assertListElementType(froms, From) assertListNonempty(froms) if __debug__: declaredTables = [] for f in froms: declaredTables += [f.table] assertListNoDuplicates(declaredTables) assertListElementPredicateOneOrMore(froms, lambda f: isinstance(f, MainFrom)) assertType(expression, Expression) assert not expression.isEmpty(), "empty expression" if __debug__: tables = expression.getTableRefs() for f in froms: if isinstance(f, AuxiliaryFrom): assert not f.joinCondition.isEmpty(),\ "empty auxiliary table join condition" for table in f.joinCondition.getTableRefs(): if table not in tables: tables += [table] for table in tables: assert table in declaredTables, "referenced table not " +\ "declared in 'froms' list: " + table.name self.froms = froms self.expression = expression self.optimizerHint = None def getTableRefsRecursive (self): tables = self.expression.getTableRefsRecursive() for f in self.froms: if f.table not in tables: tables += [f.table] if isinstance(f, AuxiliaryFrom): for table in f.joinCondition.getTableRefsRecursive(): if table not in tables: tables += [table] return tables def render (self): prefixes = {} i = 0 tables = self.getTableRefsRecursive() for table in tables: if i < 26: prefixes[table] = chr(ord("A")+i) else: prefixes[table] = "T" + str(i+1) i += 1 return self.renderGivenPrefixes(prefixes) def renderGivenPrefixes (self, prefixes): assertType(prefixes, types.DictionaryType) assertDictionaryElementType(prefixes, TableRef, types.StringType) firstMainFrom = None s = "" for f in self.froms: if firstMainFrom == None and isinstance(f, MainFrom): firstMainFrom = f if s != "": s += ", " assert prefixes.has_key(f.table), "referenced table has no " +\ "prefix: " + f.table.name s += f.table.name + " " + prefixes[f.table] t = "SELECT " if self.optimizerHint != None: t += self.optimizerHint + " " s = t + prefixes[firstMainFrom.table] + "." +\ firstMainFrom.selectableIdColumn() + " FROM " + s + " WHERE " e = Expression([]) for f in self.froms: if isinstance(f, MainFrom): if f != firstMainFrom: e = e.combine("AND", Expression([f.table, "." + f.joinableIdColumn() + " = ", firstMainFrom.table, "." + firstMainFrom.joinableIdColumn()])) elif isinstance(f, AuxiliaryFrom): e = e.combine("AND", f.joinCondition) else: unhandledCase() e = e.combine("AND", self.expression) return s + e.render(prefixes) def prependExternalIdTable (self, f): assertType(f, ExternalIdTable) s = Select([f] + self.froms, self.expression) s.optimizerHint = self.optimizerHint return s def setOptimizerHint (self, optimizerHint): assertType(optimizerHint, types.StringType) s = Select(self.froms, self.expression) s.optimizerHint = optimizerHint return s def copy (self): newFroms = [] for f in self.froms: newFroms += [f.copy()] newExpression = self.expression i = 0 while i < len(newFroms): newExpression = newExpression.replaceTableRefs(self.froms[i].table, newFroms[i].table) for f in newFroms: if isinstance(f, AuxiliaryFrom): f.joinCondition = f.joinCondition.replaceTableRefs( self.froms[i].table, newFroms[i].table) i += 1 newExpression = newExpression.recursiveCopySupport() for f in newFroms: if isinstance(f, AuxiliaryFrom): f.joinCondition = f.joinCondition.recursiveCopySupport() s = Select(newFroms, newExpression) s.optimizerHint = self.optimizerHint return s class Query: def __init__ (self, operator, list): assertSetOperator(operator) assertType(list, types.ListType) assertListElementPolytype(list, [Select, Query]) assertSetOperatorOperandConsistency(operator, list) self.operator = operator self.list = list def getTableRefsRecursive (self): tables = [] for item in self.list: for table in item.getTableRefsRecursive(): if table not in tables: tables += [table] return tables def render (self): prefixes = {} i = 0 tables = self.getTableRefsRecursive() for table in tables: if i < 26: prefixes[table] = chr(ord("A")+i) else: prefixes[table] = "T" + str(i+1) i += 1 return self.renderGivenPrefixes(prefixes) def renderGivenPrefixes (self, prefixes): assertType(prefixes, types.DictionaryType) assertDictionaryElementType(prefixes, TableRef, types.StringType) s = "" for item in self.list: if s != "": s += " " + self.operator + " " if self.operator == "UNION" or self.operator == "INTERSECT": s += "ALL " s += "(" + item.renderGivenPrefixes(prefixes) + ")" return s def prependExternalIdTable (self, f): assertType(f, ExternalIdTable) list = [] for item in self.list: list += [item.prependExternalIdTable(f.copy())] return Query(self.operator, list) def setOptimizerHint (self, optimizerHint): assertType(optimizerHint, types.StringType) list = [] for item in self.list: list += [item.setOptimizerHint(optimizerHint)] return Query(self.operator, list) def copy (self): newList = [] for item in self.list: newList += [item.copy()] return Query(self.operator, newList) # ----------------------------------------------------------------------------- # PARADIGMS # ----------------------------------------------------------------------------- class Paradigm: def translateBucketAtomic (self, constraint, vocabularies): assertType(constraint, M.Query.SimpleConstraint) # assert constraint.getField() == None assertType(vocabularies, types.ArrayType) assertListElementType(vocabularies, B99.Vocabulary) abstractMethod() def translateBucketBoolean (self, operator, constraints, vocabularies): assertBooleanOperator(operator) assertType(constraints, types.ListType) assertListElementType(constraints, M.Query.SimpleConstraint) # (Furthermore, all constraints are instances of the same # subclass of SimpleConstraint.) assertBooleanOperatorOperandConsistency(operator, constraints) assertListElementCommonValue(constraints, lambda c: c.getBucket()) # assertListElementPredicateAll(constraints, # lambda c: c.getField() == None) assertType(vocabularies, types.ArrayType) assertListElementType(vocabularies, B99.Vocabulary) return None def translateFieldAtomic (self, constraint, vocabularies): assertType(constraint, M.Query.SimpleConstraint) assert constraint.getField() != None assertType(vocabularies, types.ArrayType) assertListElementType(vocabularies, B99.Vocabulary) raise QueryError, "bucket does not support field constraints: " +\ constraint.getBucket() def translateFieldBoolean (self, operator, constraints, vocabularies): assertBooleanOperator(operator) assertType(constraints, types.ListType) assertListElementType(constraints, M.Query.SimpleConstraint) # (Furthermore, all constraints are instances of the same # subclass of SimpleConstraint.) assertBooleanOperatorOperandConsistency(operator, constraints) assertListElementCommonValue(constraints, lambda c: c.getBucket()) assertListElementPredicateAll(constraints, lambda c: c.getField() != None) assertListElementCommonValue(constraints, lambda c: c.getField().uri) assertType(vocabularies, types.ArrayType) assertListElementType(vocabularies, B99.Vocabulary) return None class FieldSearchableParadigm (Paradigm): pass # ----------------------------------------------------------------------------- # BUCKETS & BUCKET TYPES # ----------------------------------------------------------------------------- _bucketTypeConstraintClassMapping = { "spatial" : M.Query.SpatialConstraint, "temporal" : M.Query.TemporalConstraint, "hierarchical" : M.Query.HierarchicalConstraint, "textual" : M.Query.TextualConstraint, "identification" : M.Query.IdentificationConstraint, "numeric" : M.Query.NumericConstraint, "relational" : M.Query.RelationalConstraint } _constraintClassBucketTypeMapping = { M.Query.SpatialConstraint : "spatial", M.Query.TemporalConstraint : "temporal", M.Query.HierarchicalConstraint : "hierarchical", M.Query.TextualConstraint : "textual", M.Query.IdentificationConstraint : "identification", M.Query.NumericConstraint : "numeric", M.Query.RelationalConstraint : "relational" } def _constraintType (constraint): assertType(constraint, M.Query.SimpleConstraint) for type in _constraintClassBucketTypeMapping.keys(): if isinstance(constraint, type): return _constraintClassBucketTypeMapping[type] assert 0, "unrecognized constraint type: " +\ constraint.getClass().getName() class Bucket: def __init__ (self, type, operators, paradigm): assertType(type, types.StringType) assert type in _bucketTypeConstraintClassMapping.keys(),\ "unrecognized bucket type: " + type assertType(operators, types.ListType) assertListElementType(operators, types.StringType) assertType(paradigm, Paradigm) self.type = type self.operators = operators self.paradigm = paradigm # ----------------------------------------------------------------------------- # QUERY TRANSLATION # ----------------------------------------------------------------------------- def _translateAtomicConstraint (constraint, buckets, vocabularies): assertType(constraint, M.Query.SimpleConstraint) assertType(buckets, types.DictionaryType) assertDictionaryElementType(buckets, types.StringType, Bucket) assertType(vocabularies, types.ArrayType) assertListElementType(vocabularies, B99.Vocabulary) if not buckets.has_key(constraint.getBucket()): raise QueryError, "unsupported bucket: " + constraint.getBucket() bucket = buckets[constraint.getBucket()] if _constraintType(constraint) != bucket.type: raise QueryError, "unsupported constraint type for bucket '" +\ constraint.getBucket() + "': " + _constraintType(constraint) if constraint.getOperator() not in bucket.operators: raise QueryError, "unsupported operator in constraint on bucket '" +\ constraint.getBucket() + "': " + constraint.getOperator() if constraint.getField() != None: query = bucket.paradigm.translateFieldAtomic(constraint, vocabularies) else: query = bucket.paradigm.translateBucketAtomic(constraint, vocabularies) assertPolytype(query, [Select, Query]) return query def _translateBooleanConstraintCommonBucketAndField (operator, constraints, buckets, vocabularies): assertBooleanOperator(operator) assertType(constraints, types.ListType) assertListElementType(constraints, M.Query.SimpleConstraint) assertBooleanOperatorOperandConsistency(operator, constraints) assertListElementCommonValue(constraints, lambda c: c.getBucket()) assertListElementCommonValue(constraints, lambda c: _constraintType(c)) if __debug__: if constraints[0].getField() != None: assertListElementPredicateAll(constraints, lambda c: c.getField() != None) assertListElementCommonValue(constraints, lambda c: c.getField().uri) else: assertListElementPredicateAll(constraints, lambda c: c.getField() == None) assertType(buckets, types.DictionaryType) assertDictionaryElementType(buckets, types.StringType, Bucket) assertType(vocabularies, types.ArrayType) assertListElementType(vocabularies, B99.Vocabulary) if not buckets.has_key(constraints[0].getBucket()): raise QueryError, "unsupported bucket: " + constraints[0].getBucket() bucket = buckets[constraints[0].getBucket()] if _constraintType(constraints[0]) != bucket.type: raise QueryError, "unsupported constraint type for bucket '" +\ constraints[0].getBucket() + "': " +\ _constraintType(constraints[0]) for constraint in constraints: if constraint.getOperator() not in bucket.operators: raise QueryError, "unsupported operator in constraint on " +\ "bucket '" + constraint.getBucket() + "': " +\ constraint.getOperator() if constraints[0].getField() != None: query = bucket.paradigm.translateFieldBoolean(operator, constraints, vocabularies) else: query = bucket.paradigm.translateBucketBoolean(operator, constraints, vocabularies) assertPolytype(query, [Select, Query, types.NoneType]) return query _booleanOperatorMapping = { M.Query.BooleanConstraint.AND : "AND", M.Query.BooleanConstraint.OR : "OR", M.Query.BooleanConstraint.AND_NOT : "AND NOT" } def _translateBooleanConstraint (constraint, buckets, vocabularies): assertType(constraint, M.Query.BooleanConstraint) assertType(buckets, types.DictionaryType) assertDictionaryElementType(buckets, types.StringType, Bucket) assertType(vocabularies, types.ArrayType) assertListElementType(vocabularies, B99.Vocabulary) assert constraint.getOperator() in _booleanOperatorMapping.keys(),\ "unrecognized boolean operator code: " + str(constraint.getOperator()) operator = _booleanOperatorMapping[constraint.getOperator()] assertType(constraint.getOperands(), types.ArrayType) assertListElementType(constraint.getOperands(), M.Query.Constraint) operands = list(constraint.getOperands()) assertBooleanOperatorOperandConsistency(operator, operands) queries = [] while len(operands) > 0: operand = operands.pop(0) if isinstance(operand, M.Query.BooleanConstraint): queries += [_translateBooleanConstraint(operand, buckets, vocabularies)] else: inSet = [operand] outSet = [] for o in operands: if isinstance(o, M.Query.SimpleConstraint) and\ o.getBucket() == operand.getBucket() and\ ((o.getField() != None and operand.getField() != None and\ o.getField().uri == operand.getField().uri) or\ (o.getField() == None and\ operand.getField() == None)) and\ _constraintType(o) == _constraintType(operand): inSet += [o] else: outSet += [o] if len(inSet) >= 2: query = _translateBooleanConstraintCommonBucketAndField( operator, inSet, buckets, vocabularies) if query != None: queries += [query] operands = outSet else: queries += [_translateAtomicConstraint(operand, buckets, vocabularies)] else: queries += [_translateAtomicConstraint(operand, buckets, vocabularies)] return _unify(operator, queries) def _translateConstraint (constraint, buckets, vocabularies): assertType(constraint, M.Query.Constraint) assertType(buckets, types.DictionaryType) assertDictionaryElementType(buckets, types.StringType, Bucket) assertType(vocabularies, types.ArrayType) assertListElementType(vocabularies, B99.Vocabulary) if isinstance(constraint, M.Query.BooleanConstraint): query = _translateBooleanConstraint(constraint, buckets, vocabularies) else: query = _translateAtomicConstraint(constraint, buckets, vocabularies) return query class Translator: def __init__ (self, buckets, externalIdTable=None, optimizerHint=None, rewriteSetOpsAsSubqueries=0): assertType(buckets, types.DictionaryType) assertDictionaryElementType(buckets, types.StringType, Bucket) assertPolytype(externalIdTable, [ExternalIdTable, types.NoneType]) assertPolytype(optimizerHint, [types.StringType, types.NoneType]) assertType(rewriteSetOpsAsSubqueries, types.IntType) self.buckets = buckets self.externalIdTable = externalIdTable self.optimizerHint = optimizerHint self.rewriteSetOpsAsSubqueries = rewriteSetOpsAsSubqueries def translate (self, constraint, vocabularies): assertType(constraint, M.Query.Constraint) assertType(vocabularies, types.ArrayType) assertListElementType(vocabularies, B99.Vocabulary) try: query = _translateConstraint(constraint, self.buckets, vocabularies) except QueryError, e: return M.UnsupportedException( "[UniversalTranslator.py] translation error: " + e.message) if self.rewriteSetOpsAsSubqueries: query = _removeSetOperators(query) if self.externalIdTable != None: query = query.prependExternalIdTable(self.externalIdTable) if self.optimizerHint != None: query = query.setOptimizerHint(self.optimizerHint) return query.render() # ----------------------------------------------------------------------------- # QUERY REWRITING # ----------------------------------------------------------------------------- def _removeSetOperators (query): assertPolytype(query, [Select, Query]) if isinstance(query, Select): return query else: newList = [] for item in query.list: newList += [_removeSetOperators(item)] while len(newList) >= 2: newList[1] = _rewriteSetOperatorAsSubquery(newList[0], query.operator, newList[1]) newList = newList[1:] return newList[0] def _rewriteSetOperatorAsSubquery (left, operator, right): assertSelectOrSimpleDisjunction(left) assertSetOperator(operator) assertSelectOrSimpleDisjunction(right) # Rewrites two queries combined with a set operator as a single # query. Generally this is done by adding an AND IN (...) or AND # NOT IN (...) subquery clause to one of the queries. Both the # input queries to this function, and the returned query, may be # either a single Select or a simple disjunction of Selects. if operator != "EXCEPT": if isinstance(left, Select) and isinstance(right, Query): x = left left = right right = x if operator == "UNION": if isinstance(left, Select): return Query("UNION", [left, right]) elif isinstance(right, Select): return Query("UNION", left.list + [right]) else: return Query("UNION", left.list + right.list) elif operator == "INTERSECT": if isinstance(left, Select): return _combineSelects(left, right, 0) elif isinstance(right, Select): newList = [] for q in left.list: newList += [_combineSelects(q, right.copy(), 0)] return Query("UNION", newList) else: newList = [] for q in left.list: for r in right.list: newList += [_combineSelects(q.copy(), r.copy(), 0)] return Query("UNION", newList) elif operator == "EXCEPT": if isinstance(left, Select): if isinstance(right, Select): return _combineSelects(left, right, 1) else: for q in right.list: left = _combineSelects(left, q, 1) return left else: if isinstance(right, Select): newList = [] for q in left.list: newList += [_combineSelects(q, right.copy(), 1)] return Query("UNION", newList) else: newList = [] for q in left.list: q = q.copy() for r in right.list: q = _combineSelects(q, r.copy(), 1) newList += [q] return Query("UNION", newList) else: unhandledCase() def _combineSelects (outer, inner, negate): assertType(outer, Select) assertType(inner, Select) assertType(negate, types.IntType) mainFrom = None for f in outer.froms: if isinstance(f, MainFrom): mainFrom = f break assert mainFrom != None, "no main table" if negate: negateClause = " NOT" else: negateClause = "" return Select( outer.froms, outer.expression.combine("AND", Expression([ mainFrom.table, "." + mainFrom.joinableIdColumn() + negateClause + " IN ", inner]))) # ----------------------------------------------------------------------------- # UNIFICATION # ----------------------------------------------------------------------------- _booleanOperatorSetOperatorMapping = { "AND" : "INTERSECT", "OR" : "UNION", "AND NOT" : "EXCEPT" } class _EscapeHatch (exceptions.Exception): pass def _unify (operator, queries): # Note: this function, nominally internal to this module, is also # called by the Adaptor_Union and Adaptor_Concatenation paradigms. assertBooleanOperator(operator) assertType(queries, types.ListType) assertListElementPolytype(queries, [Select, Query]) queries = queries + [] didSomething = 1 while didSomething: didSomething = 0 try: for i in range(len(queries)): if isinstance(queries[i], Select): for j in range(i+1, len(queries)): if isinstance(queries[j], Select): if operator == "AND": combinedQuery = _unifyBooleanAnd( queries[i], queries[j]) elif operator == "OR": combinedQuery = _unifyBooleanOr( queries[i], queries[j]) elif operator == "AND NOT": combinedQuery = _unifyBooleanAndNot( queries[i], queries[j]) else: unhandledCase() if combinedQuery != None: del queries[j] queries[i] = combinedQuery raise _EscapeHatch except _EscapeHatch: didSomething = 1 if len(queries) >= 2: operator = _booleanOperatorSetOperatorMapping[operator] newQueries = [] for query in queries: if isinstance(query, Select) or operator == "EXCEPT": newQueries += [query] else: if query.operator == operator: for item in query.list: newQueries += [item] else: newQueries += [query] return Query(operator, newQueries) else: return queries[0] def _unifyMultipleTableReferences (select): assertType(select, Select) # If a main table is referenced multiple times in a SELECT # statement, each of its references T for which # T.cardinality.isOne() can be unified into a single reference. # Furthermore, a reference T1 for which T1.cardinality.isOne() can # be unified into a reference T2 (to the same table, of course) # for which T2.cardinality.isMany(). Thus if there are M isOne() # references and N isMany() references to a given table T in the # original SELECT statement, M >= 0 and N >= 0 and MAX(M, N) > 0, # then after calling this function there will be MAX(N, 1) # references to T. froms = select.froms + [] expression = select.expression i = 0 while i < len(froms): if isinstance(froms[i], MainFrom): j = i + 1 while j < len(froms): if isinstance(froms[j], MainFrom) and\ froms[j].table.name == froms[i].table.name and\ froms[j].idColumn == froms[i].idColumn: if froms[i].cardinality.isOne() and\ froms[j].cardinality.isMany(): f = froms[i] froms[i] = froms[j] froms[j] = f if froms[j].cardinality.isOne(): if froms[j].cardinality.isNullable() and\ not froms[i].cardinality.isNullable(): froms[i] = froms[i].clone() froms[i].cardinality = froms[i].cardinality.clone() froms[i].cardinality.nullable = 1 expression = expression.replaceTableRefs( froms[j].table, froms[i].table) for k in range(len(froms)): if isinstance(froms[k], AuxiliaryFrom): froms[k] = AuxiliaryFrom(froms[k].table, froms[k].joinCondition.replaceTableRefs( froms[j].table, froms[i].table)) del froms[j] else: j += 1 else: j += 1 i += 1 return Select(froms, expression) def _unifyBooleanAnd (select1, select2): assertType(select1, Select) assertType(select2, Select) # Consider two SELECT statements S1 and S2, each querying at least # one main table, and the expression S1 INTERSECT S2. The # statements can always be INNER JOINed on main tables into a # single SELECT. froms = select1.froms + select2.froms expression = select1.expression.combine("AND", select2.expression) return _unifyMultipleTableReferences(Select(froms, expression)) def _unifyBooleanOr (select1, select2): assertType(select1, Select) assertType(select2, Select) # Consider two SELECT statements S1 and S2, each querying at least # one main table, and the expression S1 UNION S2. The statements # can be INNER JOINed on main tables only if all main table # references T have T.cardinality.isMandatory(). for f in (select1.froms + select2.froms): if isinstance(f, MainFrom): if f.cardinality.isOptional(): return None # Consider INNER JOINing two SELECT statements such that the # combined WHERE clause is the OR of the constituent WHERE # clauses. Suppose further that the SELECT statements have been # run through _unifyMultipleTableReferences, so that the remaining # main table references are separately non-unifiable. Then for # each table that is referenced by both statements, there are # three cases of unification: 1) if each statement has a (single) # reference T for which T.cardinality.isOne(), then the two # references may be unified; 2) if one statement has an isOne() # reference and the other statement has one or more isMany() # references, then the isOne() reference may be unified into one # of the isMany() references; and 3) (this case is unique to the # OR operator) if one statement has M isMany() references and the # other statement has N isMany() references, M > 0 and N > 0, then # the references may be pairwise unified into MIN(M, N) # references. select1 = _unifyMultipleTableReferences(select1) select2 = _unifyMultipleTableReferences(select2) expression = select1.expression.combine("OR", select2.expression) leftSet = [] for f in select1.froms: leftSet += [f.clone()] rightSet = [] for f in select2.froms: rightSet += [f.clone()] finalSet = [] replacements = [] while len(leftSet) > 0: if isinstance(leftSet[0], MainFrom): lSet = [leftSet.pop(0)] outSet = [] for f in leftSet: if isinstance(f, MainFrom) and\ f.table.name == lSet[0].table.name and\ f.idColumn == lSet[0].idColumn: lSet += [f] else: outSet += [f] leftSet = outSet rSet = [] outSet = [] for f in rightSet: if isinstance(f, MainFrom) and\ f.table.name == lSet[0].table.name and\ f.idColumn == lSet[0].idColumn: rSet += [f] else: outSet += [f] rightSet = outSet if len(lSet) >= len(rSet): inSet = lSet outSet = rSet else: inSet = rSet outSet = lSet finalSet += inSet for i in range(len(outSet)): replacements += [(outSet[i], inSet[i])] if outSet[i].cardinality.isMany() and\ inSet[i].cardinality.isOne(): inSet[i].cardinality = inSet[i].cardinality.setMany() if outSet[i].cardinality.isNullable() and\ not inSet[i].cardinality.isNullable(): inSet[i].cardinality = inSet[i].cardinality.clone() inSet[i].cardinality.nullable = 1 else: finalSet += [leftSet.pop(0)] finalSet += rightSet for (old, new) in replacements: expression = expression.replaceTableRefs(old.table, new.table) for f in finalSet: if isinstance(f, AuxiliaryFrom): f.joinCondition =\ f.joinCondition.replaceTableRefs(old.table, new.table) return Select(finalSet, expression) def _unifyBooleanAndNot (select1, select2): assertType(select1, Select) assertType(select2, Select) # Consider two SELECT statements S1 and S2, each querying at least # one main table, and the expression S1 EXCEPT S2. The statements # can be INNER JOINed on main tables into a single SELECT, and # main table references can be unified, if and only if no main # table reference T in S2 has T.cardinality.isOptional() or # T.cardinality.isMany() or T.cardinality.isNullable(). for f in select2.froms: if isinstance(f, MainFrom): if f.cardinality.isOptional() or f.cardinality.isMany() or\ f.cardinality.isNullable(): return None froms = select1.froms + select2.froms expression = select1.expression.combine("AND NOT", select2.expression) return _unifyMultipleTableReferences(Select(froms, expression)) # ----------------------------------------------------------------------------- # PARADIGM CONVENIENCES # ----------------------------------------------------------------------------- def protectQuotes (text): assertType(text, types.StringType) return string.replace(text, "'", "''") def protectLikeWildcards (text): assertType(text, types.StringType) return\ string.replace( string.replace( string.replace( string.replace( text, "'", "''"), "\\", "\\\\"), "%", "\%"), "_", "\_") def constantTrueQuery (table, idColumn, nullableColumn, cardinality): # Returns a constant TRUE by selecting all rows of table 'table'. # If the cardinality of the table has isNullable(), then only # those rows for which column 'nullableColumn' is not NULL are # selected. assertType(table, types.StringType) assertType(idColumn, types.StringType) assertType(nullableColumn, types.StringType) assertType(cardinality, Cardinality) table = TableRef(table) if cardinality.isNullable(): return Select( [MainFrom(table, idColumn, Cardinality(cardinality.value))], Expression([table, "." + nullableColumn + " IS NOT NULL"])) else: return Select( [MainFrom(table, idColumn, cardinality.setOne())], Expression(["1 = 1"])) def constantFalseQuery (table, idColumn, cardinality): # Returns a constant FALSE by selecting no rows of a given table. assertType(table, types.StringType) assertType(idColumn, types.StringType) assertType(cardinality, Cardinality) return Select( [MainFrom(TableRef(table), idColumn, Cardinality(cardinality.value).setOne())], Expression(["1 = 0"])) def falsify (item): # Accepts a Select or Query object and returns a new Select object # that returns zero rows. The algorithm below is greatly # complicated by the fact that we try to use a table reference # that has cardinality isMandatory(), since such references have # better joinability properties. assertPolytype(item, [Select, Query]) mainFrom = None list = [item] while len(list) > 0 and\ (mainFrom == None or mainFrom.cardinality.isOptional()): item = list.pop(0) if isinstance(item, Select): list = item.froms + list elif isinstance(item, Query): list = item.list + list else: if isinstance(item, MainFrom): mainFrom = item assert mainFrom != None, "no main table" return constantFalseQuery(mainFrom.table.name, mainFrom.idColumn, mainFrom.cardinality)