############################################################################### ## ## ## ALEXANDRIA DIGITAL LIBRARY ## ## University of California at Santa Barbara ## ## ## ## ------------------------------------------------------------------------- ## ## ## ## Copyright (c) 2007 by the Regents of the University of California ## ## All rights reserved ## ## ## ## Redistribution and use in source and binary forms, with or without ## ## modification, are permitted provided that the following conditions are ## ## met: ## ## ## ## 1. Redistributions of source code must retain the above copyright ## ## notice, this list of conditions, and the following disclaimer. ## ## ## ## 2. Redistributions in binary form must reproduce the above copyright ## ## notice, this list of conditions, and the following disclaimer in ## ## the documentation and/or other materials provided with the ## ## distribution. ## ## ## ## 3. Neither the name of the University nor the names of its ## ## contributors may be used to endorse or promote products derived ## ## from this software without specific prior written permission. ## ## ## ## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY ## ## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ## ## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE ## ## DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ## ## ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ## ## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ## ## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ## ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ## ## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ## ## ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ## ## POSSIBILITY OF SUCH DAMAGE. ## ## ## ############################################################################### # $Header: /export/home/gjanee/mm/RCS/MODS_mapping.py,v 1.1 2007/02/07 18:54:34 gjanee Exp $ # DESCRIPTION # # Maps the Metadata Object Description Schema (MODS) # , version 3.2, to the ADL # bucket metadata view. This mapping is largely based on the MODS # to Dublin Core Metadata Element Set Mapping # , # version 3.0. # # AUTHOR # # Greg Janee # gjanee@alexandria.ucsb.edu # # HISTORY # # $Log: MODS_mapping.py,v $ # Revision 1.1 2007/02/07 18:54:34 gjanee # Initial revision # import re from ADL_mapper import * input() import ADL_buckets namespace("M", "http://www.loc.gov/mods/v3") def field (f): return ("[MODS] "+f, "http://www.loc.gov/mods/v3:/mods/"+f) def mapTerm (vocabulary, mapping, v): if v[0].lower() in mapping: return (vocabulary, mapping[v[0].lower()]) else: return None # ---------------------------------------- # ADL:TITLES # We map just the "main" title, i.e., the title that is not qualified # as being abbreviated, translated, etc. All components of the main # title are concatenated into a single value. map("adl:titles", ["/M:mods/M:titleInfo[not(@type)]", "join(*, ' ')"], field("titleInfo"), id=1) # ---------------------------------------- # ADL:GEOGRAPHIC-LOCATIONS # We map cartographic coordinates. # # MODS doesn't specify any particular syntax for expressing # coordinates, so the following mapping is necessarily incomplete as # it requires a converter or filter function to convert coordinate # strings to coordinate tuples. # # Note: per MODS, each element defines a single point; # multiple elements within a element # together define the vertices of a line or polygon. However, none of # the MODS examples follow this interpretation. If some source # metadata is ever encountered that does, then elements # will need to be grouped and mapped in toto. map("adl:geographic-locations", "/M:mods/M:subject/M:cartographics/M:coordinates", field("subject/cartographics/coordinates"), id=2) # ---------------------------------------- # ADL:DATES # We map temporal subjects. The first mapping below maps single # dates; the second pairs start and end dates (that are adjacent in # the source metadata) and maps them to date ranges. # # The 'encoding' attribute is ignored in all cases; the date syntax # (W3CDTF or ISO8601) is simply determined by observation. If other # date formats or unstructured dates are encountered, it may be # necessary to add additional converters or relax the strictness of # the mappings. # # The 'keyDate' attribute is also ignored. def isoDate (v): m = re.match("(\d\d\d\d)(\d\d)?(\d\d)?$", v[0]) if m: d = m.group(1) if m.group(2) != None: d += "-" + m.group(2) if m.group(3) != None: d += "-" + m.group(3) return (d,) else: return None map("adl:dates", "/M:mods/M:subject/M:temporal[not(@point)]", field("subject/temporal"), converters=isoDate, id=3) def pairedIsoDate (v): d = isoDate(v[:1]) if d != None: return (d[0], v[1]) else: return None dateCache = None def pairDates (v): global dateCache if dateCache != None: if v[1] == "start" and dateCache[1] == "end": r = (v[0], dateCache[0]) elif v[1] == "end" and dateCache[1] == "start": r = (dateCache[0], v[0]) else: r = None dateCache = None return r else: dateCache = v map("adl:dates", ["/M:mods/M:subject/M:temporal[@point]", ".", ".@point"], field("subject/temporal"), converters=pairedIsoDate, postfilters=pairDates, id=4) # ---------------------------------------- # ADL:TYPES # Here are two basic mappings, the first from type of resource, the # second from genre. The latter mapping implements just selected # terms from the MARC Value List for Genre Terms # . typeMap = { "cartographic" : "cartographic works", "still image" : "images" } map("adl:types", "/M:mods/M:typeOfResource", field("typeOfResource"), prefilters=lambda v: mapTerm("ADL Object Type Thesaurus", typeMap, v), id=5) marcgtMap = { "atlas" : "maps", "map" : "maps", "picture" : "images", "remote sensing image" : "remote-sensing images" } map("adl:types", "/M:mods/M:genre[@authority='marcgt']", field("genre"), prefilters=lambda v: mapTerm("ADL Object Type Thesaurus", marcgtMap, v), id=6) # ---------------------------------------- # ADL:FORMATS # We map physical description form. Note that the following mapping # from the MARC Value List for Form # omits the # 'electronic' term, since that term by itself does not hint at the # most fundamental distinction we need to make: whether the item is # online or offline. marcformMap = { "braille" : "Paper", "microfiche" : "Film", "microfilm" : "Film", "print" : "Paper", "large print" : "Paper" } map("adl:formats", "/M:mods/M:physicalDescription/M:form[@authority='marcform']", field("physicalDescription/form"), prefilters=lambda v: mapTerm("ADL Object Formats", marcformMap, v), id=7) # We also map MIME types, but only if a download URL is present, from # which we infer that the item is online. mimeMap = { "image/tiff" : "TIFF" } if present("/M:mods/M:location/M:url"): mapConstant("adl:formats", ("ADL Object Formats", "Online"), id=8) map("adl:formats", "/M:mods/M:physicalDescription/M:internetMediaType", field("physicalDescription/internetMediaType"), prefilters=lambda v: mapTerm("ADL Object Formats", mimeMap, v), id=9) # ---------------------------------------- # ADL:SUBJECT-RELATED-TEXT # We map abstracts, notes, topics, and geographic names. map("adl:subject-related-text", "/M:mods/M:abstract", field("abstract"), id=10) map("adl:subject-related-text", "/M:mods/M:note", field("note"), id=11) map("adl:subject-related-text", "/M:mods/M:subject/M:topic", field("subject/topic"), id=12) map("adl:subject-related-text", "/M:mods/M:subject/M:geographic", field("subject/geographic"), id=13) # Hierarchical geographic names are concatenated and separated by # commas. map("adl:subject-related-text", ["/M:mods/M:subject/M:hierarchicalGeographic", "join(*, ', ')"], field("subject/hierarchicalGeographic"), id=14) # ---------------------------------------- # ADL:ORIGINATORS # We map all names, regardless of role. In mapping a name, all name # parts and affiliations are concatenated into a single value; display # forms, descriptions, and roles are omitted. map("adl:originators", ["/M:mods/M:name", "join(*[self::M:namePart or self::M:affiliation], ' ')"], field("name"), id=15) # And we map all publishers. map("adl:originators", "/M:mods/M:originInfo/M:publisher", field("originInfo/publisher"), id=16) # ---------------------------------------- # ADL:IDENTIFIERS # We map all qualified, non-local, valid identifiers. map("adl:identifiers", ["/M:mods/M:identifier[@type and @type != 'local' and not(@invalid)]", ".", ".@type"], field("identifier"), id=17) # ---------------------------------------- # REQUIREMENTS & EXPECTATIONS requirement("adl:titles", "1+") output()