###############################################################################
## ##
## ALEXANDRIA DIGITAL LIBRARY ##
## University of California at Santa Barbara ##
## ##
## ------------------------------------------------------------------------- ##
## ##
## Copyright (c) 2007 by the Regents of the University of California ##
## All rights reserved ##
## ##
## Redistribution and use in source and binary forms, with or without ##
## modification, are permitted provided that the following conditions are ##
## met: ##
## ##
## 1. Redistributions of source code must retain the above copyright ##
## notice, this list of conditions, and the following disclaimer. ##
## ##
## 2. Redistributions in binary form must reproduce the above copyright ##
## notice, this list of conditions, and the following disclaimer in ##
## the documentation and/or other materials provided with the ##
## distribution. ##
## ##
## 3. Neither the name of the University nor the names of its ##
## contributors may be used to endorse or promote products derived ##
## from this software without specific prior written permission. ##
## ##
## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY ##
## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ##
## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE ##
## DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ##
## ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ##
## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ##
## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ##
## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ##
## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ##
## ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ##
## POSSIBILITY OF SUCH DAMAGE. ##
## ##
###############################################################################
# $Header: /export/home/gjanee/mm/RCS/MODS_mapping.py,v 1.1 2007/02/07 18:54:34 gjanee Exp $
# DESCRIPTION
#
# Maps the Metadata Object Description Schema (MODS)
# , version 3.2, to the ADL
# bucket metadata view. This mapping is largely based on the MODS
# to Dublin Core Metadata Element Set Mapping
# ,
# version 3.0.
#
# AUTHOR
#
# Greg Janee
# gjanee@alexandria.ucsb.edu
#
# HISTORY
#
# $Log: MODS_mapping.py,v $
# Revision 1.1 2007/02/07 18:54:34 gjanee
# Initial revision
#
import re
from ADL_mapper import *
input()
import ADL_buckets
namespace("M", "http://www.loc.gov/mods/v3")
def field (f):
return ("[MODS] "+f, "http://www.loc.gov/mods/v3:/mods/"+f)
def mapTerm (vocabulary, mapping, v):
if v[0].lower() in mapping:
return (vocabulary, mapping[v[0].lower()])
else:
return None
# ----------------------------------------
# ADL:TITLES
# We map just the "main" title, i.e., the title that is not qualified
# as being abbreviated, translated, etc. All components of the main
# title are concatenated into a single value.
map("adl:titles",
["/M:mods/M:titleInfo[not(@type)]", "join(*, ' ')"],
field("titleInfo"),
id=1)
# ----------------------------------------
# ADL:GEOGRAPHIC-LOCATIONS
# We map cartographic coordinates.
#
# MODS doesn't specify any particular syntax for expressing
# coordinates, so the following mapping is necessarily incomplete as
# it requires a converter or filter function to convert coordinate
# strings to coordinate tuples.
#
# Note: per MODS, each element defines a single point;
# multiple elements within a element
# together define the vertices of a line or polygon. However, none of
# the MODS examples follow this interpretation. If some source
# metadata is ever encountered that does, then elements
# will need to be grouped and mapped in toto.
map("adl:geographic-locations",
"/M:mods/M:subject/M:cartographics/M:coordinates",
field("subject/cartographics/coordinates"),
id=2)
# ----------------------------------------
# ADL:DATES
# We map temporal subjects. The first mapping below maps single
# dates; the second pairs start and end dates (that are adjacent in
# the source metadata) and maps them to date ranges.
#
# The 'encoding' attribute is ignored in all cases; the date syntax
# (W3CDTF or ISO8601) is simply determined by observation. If other
# date formats or unstructured dates are encountered, it may be
# necessary to add additional converters or relax the strictness of
# the mappings.
#
# The 'keyDate' attribute is also ignored.
def isoDate (v):
m = re.match("(\d\d\d\d)(\d\d)?(\d\d)?$", v[0])
if m:
d = m.group(1)
if m.group(2) != None: d += "-" + m.group(2)
if m.group(3) != None: d += "-" + m.group(3)
return (d,)
else:
return None
map("adl:dates",
"/M:mods/M:subject/M:temporal[not(@point)]",
field("subject/temporal"),
converters=isoDate,
id=3)
def pairedIsoDate (v):
d = isoDate(v[:1])
if d != None:
return (d[0], v[1])
else:
return None
dateCache = None
def pairDates (v):
global dateCache
if dateCache != None:
if v[1] == "start" and dateCache[1] == "end":
r = (v[0], dateCache[0])
elif v[1] == "end" and dateCache[1] == "start":
r = (dateCache[0], v[0])
else:
r = None
dateCache = None
return r
else:
dateCache = v
map("adl:dates",
["/M:mods/M:subject/M:temporal[@point]", ".", ".@point"],
field("subject/temporal"),
converters=pairedIsoDate,
postfilters=pairDates,
id=4)
# ----------------------------------------
# ADL:TYPES
# Here are two basic mappings, the first from type of resource, the
# second from genre. The latter mapping implements just selected
# terms from the MARC Value List for Genre Terms
# .
typeMap = {
"cartographic" : "cartographic works",
"still image" : "images" }
map("adl:types",
"/M:mods/M:typeOfResource",
field("typeOfResource"),
prefilters=lambda v: mapTerm("ADL Object Type Thesaurus", typeMap, v),
id=5)
marcgtMap = {
"atlas" : "maps",
"map" : "maps",
"picture" : "images",
"remote sensing image" : "remote-sensing images" }
map("adl:types",
"/M:mods/M:genre[@authority='marcgt']",
field("genre"),
prefilters=lambda v: mapTerm("ADL Object Type Thesaurus", marcgtMap, v),
id=6)
# ----------------------------------------
# ADL:FORMATS
# We map physical description form. Note that the following mapping
# from the MARC Value List for Form
# omits the
# 'electronic' term, since that term by itself does not hint at the
# most fundamental distinction we need to make: whether the item is
# online or offline.
marcformMap = {
"braille" : "Paper",
"microfiche" : "Film",
"microfilm" : "Film",
"print" : "Paper",
"large print" : "Paper" }
map("adl:formats",
"/M:mods/M:physicalDescription/M:form[@authority='marcform']",
field("physicalDescription/form"),
prefilters=lambda v: mapTerm("ADL Object Formats", marcformMap, v),
id=7)
# We also map MIME types, but only if a download URL is present, from
# which we infer that the item is online.
mimeMap = {
"image/tiff" : "TIFF" }
if present("/M:mods/M:location/M:url"):
mapConstant("adl:formats", ("ADL Object Formats", "Online"), id=8)
map("adl:formats",
"/M:mods/M:physicalDescription/M:internetMediaType",
field("physicalDescription/internetMediaType"),
prefilters=lambda v: mapTerm("ADL Object Formats", mimeMap, v),
id=9)
# ----------------------------------------
# ADL:SUBJECT-RELATED-TEXT
# We map abstracts, notes, topics, and geographic names.
map("adl:subject-related-text",
"/M:mods/M:abstract",
field("abstract"),
id=10)
map("adl:subject-related-text",
"/M:mods/M:note",
field("note"),
id=11)
map("adl:subject-related-text",
"/M:mods/M:subject/M:topic",
field("subject/topic"),
id=12)
map("adl:subject-related-text",
"/M:mods/M:subject/M:geographic",
field("subject/geographic"),
id=13)
# Hierarchical geographic names are concatenated and separated by
# commas.
map("adl:subject-related-text",
["/M:mods/M:subject/M:hierarchicalGeographic", "join(*, ', ')"],
field("subject/hierarchicalGeographic"),
id=14)
# ----------------------------------------
# ADL:ORIGINATORS
# We map all names, regardless of role. In mapping a name, all name
# parts and affiliations are concatenated into a single value; display
# forms, descriptions, and roles are omitted.
map("adl:originators",
["/M:mods/M:name",
"join(*[self::M:namePart or self::M:affiliation], ' ')"],
field("name"),
id=15)
# And we map all publishers.
map("adl:originators",
"/M:mods/M:originInfo/M:publisher",
field("originInfo/publisher"),
id=16)
# ----------------------------------------
# ADL:IDENTIFIERS
# We map all qualified, non-local, valid identifiers.
map("adl:identifiers",
["/M:mods/M:identifier[@type and @type != 'local' and not(@invalid)]",
".", ".@type"],
field("identifier"),
id=17)
# ----------------------------------------
# REQUIREMENTS & EXPECTATIONS
requirement("adl:titles", "1+")
output()