############################################################################### ## ## ## ALEXANDRIA DIGITAL LIBRARY ## ## University of California at Santa Barbara ## ## ## ## ------------------------------------------------------------------------- ## ## ## ## Copyright (c) 2005 by the Regents of the University of California ## ## All rights reserved ## ## ## ## Redistribution and use in source and binary forms, with or without ## ## modification, are permitted provided that the following conditions are ## ## met: ## ## ## ## 1. Redistributions of source code must retain the above copyright ## ## notice, this list of conditions, and the following disclaimer. ## ## ## ## 2. Redistributions in binary form must reproduce the above copyright ## ## notice, this list of conditions, and the following disclaimer in ## ## the documentation and/or other materials provided with the ## ## distribution. ## ## ## ## 3. All advertising materials mentioning features or use of this ## ## software must display the following acknowledgement: This product ## ## includes software developed by the Alexandria Digital Library, ## ## University of California at Santa Barbara, and its contributors. ## ## ## ## 4. Neither the name of the University nor the names of its ## ## contributors may be used to endorse or promote products derived ## ## from this software without specific prior written permission. ## ## ## ## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY ## ## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ## ## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE ## ## DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ## ## ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ## ## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ## ## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ## ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ## ## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ## ## ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ## ## POSSIBILITY OF SUCH DAMAGE. ## ## ## ############################################################################### # $Header: /export/home/gjanee/mm/RCS/ADL_mapper.py,v 1.5 2007/02/07 17:28:44 gjanee Exp $ # SYNOPSIS # # python mapping [-options] [input] # # mapping: # # Python script/module of the form: # # from ADL_mapper import * # input() # ...declarations... # output() # # options: # # -t # Print tracebacks on errors and warnings. # # -Dparam=value # Define a parameter. Parameters can be accessed from the # mapping using the getParam() function. # # input: # # Filename or URL of the source metadata to be mapped; if not # specified, the source metadata is read from standard input. # # DESCRIPTION # # Python module that defines the ADL mapping language. See # http://www.alexandria.ucsb.edu/~gjanee/mm/tutorial.html for more # information. # # This module requires the PyXML package, available at # http://pyxml.sourceforge.net/. # # AUTHOR # # Greg Janee # gjanee@alexandria.ucsb.edu # # HISTORY # # $Log: ADL_mapper.py,v $ # Revision 1.5 2007/02/07 17:28:44 gjanee # Added the 'join' XPath pseudo-function. # # Revision 1.4 2005/06/08 16:54:34 gjanee # Added support for XML namespaces. Added the 'namespace' # function. # # Revision 1.3 2005/03/18 20:52:56 gjanee # Added a 'separator' argument to 'consolidateTextualValues'. # Removed 'unconsolidateTextualValues', as it is no longer needed. # # Revision 1.2 2005/03/10 17:21:48 gjanee # Added some syntactic sugar. # # Revision 1.1 2005/02/17 18:16:54 gjanee # Initial revision # import re import sys import traceback import types import xml.dom import xml.dom.ext.reader.Sax2 import xml.xpath # ---------------------------------------- # TYPE CHECKING _str = [str, unicode] def _typecheck1 (object, spec): if type(spec) is type: return type(object) is spec elif type(spec) is list: for s in spec: if _typecheck1(object, s): return True return False elif type(spec) is tuple: if spec[0] == tuple: if type(object) is not tuple or len(object) != len(spec)-1: return False for o, s in zip(object, spec[1:]): if not _typecheck1(o, s): return False return True elif spec[0] == list: if type(object) is not list or\ (len(spec) >= 3 and len(object) < spec[2]) or\ (len(spec) >= 4 and len(object) > spec[3]): return False for o in object: if not _typecheck1(o, spec[1]): return False return True elif spec[0] == dict: if type(object) is not dict: return False for (k, v) in object.items(): if not _typecheck1(k, spec[1]): return False if not _typecheck1(v, spec[2]): return False return True else: return False else: return False def _formatSpec (spec): if type(spec) is type: return str(spec) elif type(spec) is list: m = "" for s in spec: if m != "": m += " or " m += _formatSpec(s) return m elif type(spec) is tuple: if spec[0] == tuple: m = "" for s in spec[1:]: if m != "": m += ", " m += _formatSpec(s) return "tuple (" + m + ")" elif spec[0] == list: m = "list" if len(spec) >= 3: m += " " + str(spec[2]) + ".." if len(spec) >= 4: m += str(spec[3]) else: m += "*" return m + " [" + _formatSpec(spec[1]) + "]" elif spec[0] == dict: return "dictionary {" + _formatSpec(spec[1]) + " : " +\ _formatSpec(spec[2]) + "}" else: return "?" else: return "?" def _typecheck (argNum, object, spec): if __debug__: if not _typecheck1(object, spec): if argNum > 0: m = "argument " + str(argNum) + ": " else: m = "" raise TypeError, m + "expecting " + _formatSpec(spec) # ---------------------------------------- # UTILITIES def _listify (x): if type(x) is list: # must duplicate lists in certain cases to avoid shared storage return list(x) else: return [x] def _plural (n): if n == 1: return "" else: return "s" # ---------------------------------------- # ERROR HANDLING _printTracebacks = False def fatal (message): _typecheck(1, message, _str) sys.stderr.write(sys.argv[0] + ": FATAL ERROR: " + message + "\n") if _printTracebacks: traceback.print_stack() sys.exit(1) def warning (message): _typecheck(1, message, _str) sys.stderr.write(sys.argv[0] + ": WARNING: " + message + "\n") if _printTracebacks: traceback.print_stack() def _xmlException (activity): fatal("exception raised while " + activity + ": " +\ str(sys.exc_info()[0]) + ": " + str(sys.exc_info()[1])) def _usage (): sys.stderr.write( "usage: python mapping [-options] [input]\n" +\ "\n" +\ "mapping:\n" +\ "\n" +\ " Python script/module of the form:\n" +\ "\n" +\ " from ADL_mapper import *\n" +\ " input()\n" +\ " ...declarations...\n" +\ " output()\n" +\ "\n" +\ "options:\n" +\ "\n" +\ " -t\n" +\ " Print tracebacks on errors and warnings.\n" +\ "\n" +\ " -Dparam=value\n" +\ " Define a parameter. Parameters can be accessed from the\n" +\ " mapping using the getParam() function.\n" +\ "\n" +\ "input:\n" +\ "\n" +\ " Filename or URL of the source metadata to be mapped; if not\n" +\ " specified, the source metadata is read from standard input.\n") sys.exit(1) # ---------------------------------------- # BUCKET TYPES # _bucketTypes ::= { name : (validator, encoder), ... } # name ::= _str, e.g., "temporal" # validator ::= func(bucket, field, value, strict) -> retval # bucket ::= _str, e.g., "adl:dates" # field ::= (fieldName, fieldUri) or None # fieldName ::= _str, e.g., "[DC] Title" # fieldUri ::= _str, e.g., "http://purl.org/dc/elements/1.1/title" # value ::= tuple # strict ::= bool # retval ::= (field, value) or None # encoder ::= func(document, field, value) -> element # document ::= DOM document # element ::= DOM element _bucketTypes = {} def bucketType (name, validator, encoder): _typecheck(1, name, _str) _typecheck(2, validator, types.FunctionType) _typecheck(3, encoder, types.FunctionType) _bucketTypes[name] = (validator, encoder) # ---------------------------------------- # BUCKETS # _buckets ::= { name : type, ... } # name ::= _str, e.g., "adl:dates" # type ::= _str, e.g., "temporal" _buckets = {} def bucket (name, type): _typecheck(1, name, _str) _typecheck(2, type, _str) if type not in _bucketTypes: fatal("unrecognized bucket type: " + type) _buckets[name] = type # ---------------------------------------- # VOCABULARIES # _vocabularies ::= { name : ([bucket, ...], termAncestorMap), ... } # name ::= _str, e.g., "ADL Object Type Thesaurus" # bucket ::= _str, e.g., "adl:types" # termAncestorMap ::= { term : [term, ...], ... } # term ::= _str, e.g., "maps" _vocabularies = {} def _walkGraph (vocabularyName, path, term, map): if type(term) is str or type(term) is unicode: t = term else: t = term[0] ancestors = map.get(t, []) for ancestor in path: if ancestor == t: fatal("cycle detected in vocabulary '" + vocabularyName +\ "': term '" + t + "'") if ancestor not in ancestors: ancestors.append(ancestor) map[t] = ancestors if type(term) is tuple: path.append(t) for t in term[1]: _walkGraph(vocabularyName, path, t, map) del path[len(path)-1] # This is the real tree type specification, but we can't use it # because it will generate an infinitely recursive error message. _treeTypespec = [_str, (tuple, _str, (list, ["x"], 1))] _treeTypespec[1][2][1][0] = _treeTypespec def vocabulary (name, buckets, terms): _typecheck(1, name, _str) _typecheck(2, buckets, [_str, (list, _str, 1)]) _typecheck(3, terms, (list, [_str, (tuple, _str, (list, [_str, (tuple, _str, (list, [_str, (tuple, _str, (list, [_str, tuple], 1))], 1))], 1))])) buckets = _listify(buckets) for bucket in buckets: if bucket not in _buckets: fatal("undeclared bucket: " + bucket) if _buckets[bucket] != "hierarchical": fatal("attempt to associate vocabulary with non-hierarchical " +\ "bucket '" + bucket + "'") map = {} for term in terms: _walkGraph(name, [], term, map) _vocabularies[name] = (buckets, map) def getVocabulary (name): _typecheck(1, name, _str) return _vocabularies.get(name, None) # ---------------------------------------- # REQUIREMENTS & EXPECTATIONS # _requirements ::= { bucket : cardinality, ... } # bucket ::= _str, e.g., "adl:dates" # cardinality ::= _str, e.g., "1+" _requirements = {} def requirement (bucket, cardinality): _typecheck(1, bucket, _str) _typecheck(2, cardinality, _str) if bucket not in _buckets: fatal("undeclared bucket: " + bucket) if cardinality not in ["1", "1?", "1+", "0+"]: fatal("bad cardinality: " + cardinality) _requirements[bucket] = cardinality # _expectations ::= { bucket : cardinality, ... } # bucket ::= _str, e.g., "adl:dates" # cardinality ::= _str, e.g., "1+" _expectations = {} def expectation (bucket, cardinality): _typecheck(1, bucket, _str) _typecheck(2, cardinality, _str) if bucket not in _buckets: fatal("undeclared bucket: " + bucket) if cardinality not in ["1", "1?", "1+", "0+"]: fatal("bad cardinality: " + cardinality) _expectations[bucket] = cardinality # ---------------------------------------- # PARAMETERS # _params ::= { param : value, ... } # param ::= _str, e.g., "collection" # value ::= _str, e.g., "adl_catalog" _params = {} _params["collection"] = "collection" _params["holding"] = "holding" def getParam (param): _typecheck(1, param, _str) return _params.get(param, None) def setParam (param, value): _typecheck(1, param, _str) _params[param] = value # ---------------------------------------- # INPUT _invocationLevel = 0 _source = None def input (): global _invocationLevel, _printTracebacks, _source _invocationLevel += 1 if _invocationLevel > 1: return for i in range(1, len(sys.argv)): arg = sys.argv[i] if arg.startswith("-"): if arg == "-t": _printTracebacks = True elif arg.startswith("-D"): l = arg[2:].split("=", 1) if len(l) != 2: _usage() _params[l[0]] = l[1] else: _usage() else: if i != len(sys.argv)-1: _usage() _source = arg try: if _source != None: _source = xml.dom.ext.reader.Sax2.Reader().fromStream(_source) else: _source = xml.dom.ext.reader.Sax2.Reader().fromStream(sys.stdin) except: _xmlException("reading/parsing the source metadata") def getSource (): return _source # ---------------------------------------- # NAMESPACES _namespaces = {} def namespace (prefix, uri): _typecheck(1, prefix, _str) _typecheck(2, uri, _str) _namespaces[prefix] = uri # ---------------------------------------- # QUERIES _attributeSelector = re.compile("(.*)@([\w.:-]+)$") def _rewriteQuery (query): newQuery = [] context = None for i in range(len(query)): expr = query[i] if expr == "" or expr.startswith("="): newQuery.append(expr) elif expr.startswith("/"): j = i + 1 while j < len(query) and query[j].startswith("="): j += 1 if j == len(query) or query[j].startswith("/"): # This expression has no relative expressions. match = _attributeSelector.match(expr) if match: newQuery.append(match.group(1)) newQuery.append(".@" + match.group(2)) else: newQuery.append(expr) newQuery.append(".") else: newQuery.append(expr) context = expr else: if context == None: fatal("relative XPath expression has no absolute " +\ "contextual expression: " + expr) newQuery.append(expr) return newQuery def _getValue (contextExpr, i, n, expr, j, m, attribute, node): if attribute == None: children = node.childNodes if len(children) == 0: return None elif len(children) == 1: node = children[0] if node.nodeType != xml.dom.Node.TEXT_NODE: fatal("error evaluating XPath expression '" + expr +\ "' relative to contextual expression '" +\ contextExpr + "', while processing context " +\ "node " + str(i+1) + " of " + str(n) +\ ", relative node " + str(j+1) + " of " + str(m) +\ ": node produced by relative expression does " +\ "not contain text only") v = node.nodeValue.strip() if v == "": return None else: return v else: fatal("error evaluating XPath expression '" + expr +\ "' relative to contextual expression '" +\ contextExpr + "', while processing context node " +\ str(i+1) + " of " + str(n) + ", relative node " + str(j+1) +\ " of " + str(m) +\ ": node produced by relative expression has more " +\ "than one child node") else: a = node.getAttribute(attribute).strip() if a == "": return None else: return a _joinSelector = re.compile(" *join *\( *(.*?) *, *('[^']*'|\"[^\"]*\") *\) *$") def _getRelative (contextExpr, context, expr): match = _joinSelector.match(expr) if match: expr = match.group(1) joinString = match.group(2)[1:-1] else: joinString = None match = _attributeSelector.match(expr) if match: expr = match.group(1) attribute = match.group(2) else: attribute = None values = [] for i in range(len(context)): try: nodes = xml.xpath.Evaluate(expr, context=xml.xpath.Context.Context(context[i], processorNss=_namespaces)) except: _xmlException("evaluating XPath expression '" + expr +\ "' relative to contextual expression '" + contextExpr +\ "', while processing context node " + str(i+1) + " of " +\ str(len(context))) if len(nodes) == 0: values.append(None) elif len(nodes) == 1 or joinString != None: vals = [] for j in range(len(nodes)): v = _getValue(contextExpr, i, len(context), expr, j, len(nodes), attribute, nodes[j]) if v != None: vals.append(v) if len(vals) > 0: if joinString == None: values.append(vals[0]) else: values.append(joinString.join(vals)) else: values.append(None) else: fatal("error evaluating XPath expression '" + expr +\ "' relative to contextual expression '" + contextExpr +\ "', while processing context node " + str(i+1) + " of " +\ str(len(context)) + ": relative expression produced more " +\ "than one DOM node") return values # In the following: # # results ::= [column, ...] # column ::= [value, ...] # value ::= _str # # values ::= [row, ...] # row ::= (value, ...) def get (query): _typecheck(1, query, [_str, (list, _str, 1)]) query = _rewriteQuery(_listify(query)) results = [] length = -1 isConstant = [] for i, expr in enumerate(query): if expr == "": results.append([None]) isConstant.append(True) elif expr.startswith("="): s = expr[1:].strip() if s == "": results.append([None]) else: results.append([s]) isConstant.append(True) elif expr.startswith("/"): try: context = xml.xpath.Evaluate(expr, context=xml.xpath.Context.Context(_source, processorNss=_namespaces)) except: _xmlException("evaluating XPath expression '" + expr + "'") if length >= 0 and len(context) != length: fatal("incomensurable column lengths in query: " +\ "expression '" + contextExpr + "' produced " +\ str(length) + " value" + _plural(length) + " while " +\ "expression '" + expr +"' produced " +\ str(len(context)) + " value" + _plural(len(context))) contextExpr = expr length = len(context) else: results.append(_getRelative(contextExpr, context, expr)) isConstant.append(False) values = [] if length < 0: length = 1 for i in range(length): # If there are any non-constant columns in the query, each row # must have at least one non-null value from a non-constant # column. hasNonNullColumn = False for c in range(len(results)): if len(results[c]) == 1: v = results[c][0] else: v = results[c][i] if v != None and not isConstant[c]: hasNonNullColumn = True break if hasNonNullColumn or False not in isConstant: row = [] for column in results: if len(column) == 1: row.append(column[0]) else: row.append(column[i]) values.append(tuple(row)) return values # ---------------------------------------- # MAPPING DECLARATIONS # _mapdecls ::= [decl, ...] # decl ::= (bucket, query, field, prefilters, converters, postfilters, # strict, id) # bucket ::= _str, e.g., "adl:dates" # query ::= [expr, ...] # expr ::= _str, e.g., "/metadata/title" # field ::= (fieldName, fieldUri) or None # fieldName ::= _str, e.g., "[DC] Title" # fieldUri ::= _str, e.g., "http://purl.org/dc/elements/1.1/title" # prefilters ::= [filter, ...] # converters ::= [converter, ...] # postfilters ::= [filter, ...] # filter ::= func(value) -> retval # converter ::= func(value) -> retval # value ::= tuple # retval ::= value or None # strict ::= bool # id ::= int or None _mapdecls = [] def map (bucket, query, field=None, prefilters=[], converters=[], postfilters=[], strict=True, id=None): _typecheck(1, bucket, _str) _typecheck(2, query, [_str, (list, _str, 1)]) _typecheck(3, field, [(tuple, _str, _str), types.NoneType]) _typecheck(4, prefilters, [types.FunctionType, (list, types.FunctionType)]) _typecheck(5, converters, [types.FunctionType, (list, types.FunctionType)]) _typecheck(6, postfilters, [types.FunctionType, (list, types.FunctionType)]) _typecheck(7, strict, bool) _typecheck(8, id, [int, types.NoneType]) if bucket not in _buckets: fatal("undeclared bucket: " + bucket) if id != None: for d in _mapdecls: if d[0] == bucket and d[7] == id: fatal("duplicate mapping ID for bucket '" + bucket + "': " +\ str(id)) _mapdecls.append((bucket, _listify(query), field, _listify(prefilters), _listify(converters), _listify(postfilters), strict, id)) def _addFilterOrConverter (bucket, filter, field, id, index, append): _typecheck(1, bucket, _str) _typecheck(2, filter, types.FunctionType) _typecheck(3, field, [(tuple, _str, _str), types.NoneType]) _typecheck(4, id, [int, types.NoneType]) if bucket not in _buckets: fatal("undeclared bucket: " + bucket) for d in _mapdecls: if d[0] == bucket and (field == None or field == d[2]) and\ (id == None or id == d[7]): if append: d[index].append(filter) else: d[index].insert(0, filter) def prependPrefilter (bucket, filter, field=None, id=None): _addFilterOrConverter(bucket, filter, field, id, 3, False) def appendPrefilter (bucket, filter, field=None, id=None): _addFilterOrConverter(bucket, filter, field, id, 3, True) def prependConverter (bucket, converter, field=None, id=None): _addFilterOrConverter(bucket, converter, field, id, 4, False) def appendConverter (bucket, converter, field=None, id=None): _addFilterOrConverter(bucket, converter, field, id, 4, True) def prependPostfilter (bucket, filter, field=None, id=None): _addFilterOrConverter(bucket, filter, field, id, 5, False) def appendPostfilter (bucket, filter, field=None, id=None): _addFilterOrConverter(bucket, filter, field, id, 5, True) def strict (bucket, newStrict, field=None, id=None): _typecheck(1, bucket, _str) _typecheck(2, newStrict, bool) _typecheck(3, field, [(tuple, _str, _str), types.NoneType]) _typecheck(4, id, [int, types.NoneType]) if bucket not in _buckets: fatal("undeclared bucket: " + bucket) for i in range(len(_mapdecls)): d = _mapdecls[i] if d[0] == bucket and (field == None or field == d[2]) and\ (id == None or id == d[7]): _mapdecls[i] = (d[0], d[1], d[2], d[3], d[4], d[5], newStrict, d[7]) def unmap (bucket, field=None, id=None): _typecheck(1, bucket, _str) _typecheck(2, field, [(tuple, _str, _str), types.NoneType]) _typecheck(3, id, [int, types.NoneType]) if bucket not in _buckets: fatal("undeclared bucket: " + bucket) i = 0 while i < len(_mapdecls): d = _mapdecls[i] if d[0] == bucket and (field == None or field == d[2]) and\ (id == None or id == d[7]): del _mapdecls[i] else: i += 1 # ---------------------------------------- # PROCESSING & VALIDATION # _mappings ::= { bucket : [mapping, ...], ... } # bucket ::= _str, e.g., "adl:dates" # mapping ::= (field, value) # field ::= (fieldName, fieldUri) or None # fieldName ::= _str, e.g., "[DC] Title" # fieldUri ::= _str, e.g., "http://purl.org/dc/elements/1.1/title" # value ::= tuple _mappings = {} def _process1 (bucket, query, field, prefilters, converters, postfilters, strict): validator = _bucketTypes[_buckets[bucket]][0] if bucket in _mappings: mappings = _mappings[bucket] else: mappings = [] for value in get(query): for filter in prefilters: value = filter(value) _typecheck(0, value, [tuple, types.NoneType]) if value == None: break if value == None: continue for converter in converters: v = converter(value) _typecheck(0, v, [tuple, types.NoneType]) if v != None: value = v break for filter in postfilters: value = filter(value) _typecheck(0, value, [tuple, types.NoneType]) if value == None: break if value == None: continue mapping = validator(bucket, field, value, strict) _typecheck(0, mapping, [(tuple, [(tuple, _str, _str), types.NoneType], tuple), types.NoneType]) if mapping != None: mappings.append(mapping) _mappings[bucket] = mappings def _process (): for d in _mapdecls: _process1(d[0], d[1], d[2], d[3], d[4], d[5], d[6]) # ---------------------------------------- # ADDITIONAL CHECKS def _checkRequirements (): for bucket, cardinality in _requirements.items(): n = len(_mappings.get(bucket, [])) if (cardinality == "1" and n != 1) or\ (cardinality == "1?" and n > 1) or\ (cardinality == "1+" and n == 0): fatal("mapping requirement not satisfied: required cardinality " +\ "for bucket '%s' is '%s', got %s mappings" %\ (bucket, cardinality, n)) def _checkExpectations (): for bucket, cardinality in _expectations.items(): n = len(_mappings.get(bucket, [])) if (cardinality == "1" and n != 1) or\ (cardinality == "1?" and n > 1) or\ (cardinality == "1+" and n == 0): warning("mapping expectation not met: expected cardinality " +\ "for bucket '%s' is '%s', got %s mappings" %\ (bucket, cardinality, n)) # ---------------------------------------- # POST-PROCESSING # _consolidatedTextualBuckets ::= { bucket : separator, ... } # bucket ::= _str, e.g., "adl:assigned-terms" # separator ::= _str, e.g., "; " _consolidatedTextualBuckets = {} def consolidateTextualValues (buckets, separator="; "): _typecheck(1, buckets, [_str, (list, _str, 1)]) _typecheck(2, separator, [_str, types.NoneType]) buckets = _listify(buckets) for bucket in buckets: if bucket not in _buckets: fatal("undeclared bucket: " + bucket) if _buckets[bucket] != "textual": fatal("attempt to consolidate non-textual bucket '" + bucket + "'") if separator != None: _consolidatedTextualBuckets[bucket] = separator else: if bucket in _consolidatedTextualBuckets: del _consolidatedTextualBuckets[bucket] def _consolidateTextualMappings (): for bucket in _mappings: if bucket in _consolidatedTextualBuckets: separator = _consolidatedTextualBuckets[bucket] d = {} for field, value in _mappings[bucket]: if field in d: d[field] = d[field] + separator + value[0] else: d[field] = value[0] mappings = [] for field, value in d.items(): mappings.append((field, (value,))) _mappings[bucket] = mappings def _consolidateHierarchicalMappings1 (mappings): # We need to use a two-pass algorithm so that ancestor mappings # will be weeded out regardless of the order of the mappings. seen = {} for (field, value) in mappings: if value not in seen: seen[value] = "primary" ancestors = _vocabularies[value[0]][1][value[1]] for ancestor in ancestors: seen[(value[0], ancestor)] = "implied" i = 0 while i < len(mappings): (field, value) = mappings[i] if seen[value] == "primary": seen[value] = "seen" i += 1 else: del mappings[i] def _consolidateHierarchicalMappings (): for bucket, mappings in _mappings.items(): if _buckets[bucket] == "hierarchical": _consolidateHierarchicalMappings1(mappings) # ---------------------------------------- # OUTPUT _dtd = "http://www.alexandria.ucsb.edu/middleware/dtds/ADL-bucket-report.dtd" def output (): global _invocationLevel _invocationLevel -= 1 if _invocationLevel > 0: return _process() _checkRequirements() _checkExpectations() _consolidateTextualMappings() _consolidateHierarchicalMappings() imp = xml.dom.DOMImplementation.getDOMImplementation() document = imp.createDocument(None, "ADL-bucket-report", imp.createDocumentType("ADL-bucket-report", None, _dtd)) e = document.createElement("identifier") e.appendChild(document.createTextNode(_params["collection"] + ":" +\ _params["holding"])) document.documentElement.appendChild(e) for bucket in _mappings: if len(_mappings[bucket]) > 0: e = document.createElement("bucket") e.setAttribute("name", bucket) for field, value in _mappings[bucket]: encoder = _bucketTypes[_buckets[bucket]][1] e.appendChild(encoder(document, field, value)) document.documentElement.appendChild(e) xml.dom.ext.PrettyPrint(document) # ---------------------------------------- # SYNTACTIC SUGAR def present (query): _typecheck(1, query, [_str, (list, _str, 1)]) return len(get(query)) > 0 def mapConstant (bucket, value, field=None, id=None): _typecheck(1, bucket, _str) _typecheck(2, value, [_str, tuple]) _typecheck(3, field, [(tuple, _str, _str), types.NoneType]) _typecheck(4, id, [int, types.NoneType]) if type(value) is not tuple: value = (value,) map(bucket, ["="+v for v in value], field, id=id) # The following import must appear last to allow the bucket type # modules to see the definitions in this module. import bucket_types