############################################################################### ## ## ## ALEXANDRIA DIGITAL LIBRARY ## ## University of California at Santa Barbara ## ## ## ## ------------------------------------------------------------------------- ## ## ## ## Copyright (c) 2005 by the Regents of the University of California ## ## All rights reserved ## ## ## ## Redistribution and use in source and binary forms, with or without ## ## modification, are permitted provided that the following conditions are ## ## met: ## ## ## ## 1. Redistributions of source code must retain the above copyright ## ## notice, this list of conditions, and the following disclaimer. ## ## ## ## 2. Redistributions in binary form must reproduce the above copyright ## ## notice, this list of conditions, and the following disclaimer in ## ## the documentation and/or other materials provided with the ## ## distribution. ## ## ## ## 3. All advertising materials mentioning features or use of this ## ## software must display the following acknowledgement: This product ## ## includes software developed by the Alexandria Digital Library, ## ## University of California at Santa Barbara, and its contributors. ## ## ## ## 4. Neither the name of the University nor the names of its ## ## contributors may be used to endorse or promote products derived ## ## from this software without specific prior written permission. ## ## ## ## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY ## ## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ## ## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE ## ## DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ## ## ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ## ## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ## ## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ## ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ## ## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ## ## ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ## ## POSSIBILITY OF SUCH DAMAGE. ## ## ## ############################################################################### # $Header: /export/home/gjanee/mm/RCS/MARC_mapping.py,v 1.2 2005/06/08 17:11:32 gjanee Exp $ # DESCRIPTION # # This is a mapping from the MARC 21 Concise Format for # Bibliographic Data (specifically, the MARCXML encoding thereof) # to the ADL bucket, browse, and access metadata views. # # References: # MARC Bibliographic: # http://www.loc.gov/marc/bibliographic/ecbdhome.html # MARCXML: # http://www.loc.gov/standards/marcxml/ # # For the most part, this mapping takes the standpoint that the # basic semantic unit in a MARC record is the field, with # subfields being a highly variable, relatively unreliable kind of # intra-field markup. This interpretation is borne out by the # examples given in the MARC specification and by the MARC records # on which this mapping was initially exercised. Thus, for # example, we map entire 100 and 110 fields to the # 'adl:originators' bucket as opposed to parsing and mapping # specific subfields therein. # # Limitations: given MARC's all-inclusive nature and wide range of # applications, this mapping is necessarily far from complete. # Many potentially useful mappings are not present. Also, this # mapping does not try to ferret out the many fields and subfields # in which any given piece of information (e.g., a geographic # placename) can possibly appear. Instead, we map only the most # directly relevant, well-populated fields. # # AUTHOR # # Greg Janee # gjanee@alexandria.ucsb.edu # # HISTORY # # $Log: MARC_mapping.py,v $ # Revision 1.2 2005/06/08 17:11:32 gjanee # Added support for XML namespaces. # # Revision 1.1 2005/03/18 21:20:06 gjanee # Initial revision # import re from ADL_mapper import * input() import ADL_buckets PREFIX = "tag:loc.gov,2000:MARC21-B:" namespace("M", "http://www.loc.gov/MARC21/slim") # ---------------------------------------- # XPATH ABBREVIATIONS def tag (t): return "/M:record/M:datafield[@tag='" + t + "']" def entireTag (t): return tag(t) + "/M:subfield" def code (c): return "M:subfield[@code='" + c + "']" def tagCode (t, c): return tag(t) + "/" + code(c) def qualifiedTag (t, ind1=None, ind2=None): clause1 = "" if ind1 != None: for c in ind1: if clause1 != "": clause1 += " or " clause1 += "@ind1='" + c + "'" if len(ind1) > 1: clause1 = "(" + clause1 + ")" clause1 = " and " + clause1 clause2 = "" if ind2 != None: for c in ind2: if clause2 != "": clause2 += " or " clause2 += "@ind2='" + c + "'" if len(ind2) > 1: clause2 = "(" + clause2 + ")" clause2 = " and " + clause2 return tag(t)[:-1] + clause1 + clause2 + "]" def qualifiedTagCode (t, c, ind1=None, ind2=None): return qualifiedTag(t, ind1, ind2) + "/" + code(c) def entireQualifiedTag (t, ind1=None, ind2=None): return qualifiedTag(t, ind1, ind2) + "/M:subfield" # ---------------------------------------- # ADL:TITLES map("adl:titles", entireTag("245"), ("[MARC] Title Statement", PREFIX+"245")) consolidateTextualValues("adl:titles", " ") # ---------------------------------------- # ADL:GEOGRAPHIC-LOCATIONS _dmsParser = re.compile("([NSEW])(\d\d\d)(\d\d)(\d\d)$") def cvtDmsCoordinate (coord, dirs, limit): match = _dmsParser.match(coord) if match: dir = match.group(1) d = int(match.group(2)) m = int(match.group(3)) s = int(match.group(4)) if dir in dirs and d <= limit and m < 60 and s < 60 and\ (d < limit or (m == 0 and s == 0)): v = d + m/60.0 + s/3600.0 if dir == dirs[1]: v = -v return str(v) else: return None else: return None def cvtDmsCoordinates (v): if None in v: return None v = (cvtDmsCoordinate(v[0], "NS", 90), cvtDmsCoordinate(v[1], "NS", 90), cvtDmsCoordinate(v[2], "EW", 180), cvtDmsCoordinate(v[3], "EW", 180)) if None in v: return None return v map("adl:geographic-locations", [tag("034"), code("f"), code("g"), code("e"), code("d")], ("[MARC] Coded Cartographic Mathematical Data", PREFIX+"034"), converters=cvtDmsCoordinates) # ---------------------------------------- # ADL:DATES def fltRemoveBcDate (v): if v[0].startswith("c"): return None else: return v _ymdhParser = re.compile("d(\d\d\d\d)(\d\d)?(\d\d)?(\d\d)?$") def cvtYmdhDate (v): match = _ymdhParser.match(v[0]) if match: d = match.group(1) if match.group(2) != None: d += "-" + match.group(2) if match.group(3) != None: d += "-" + match.group(3) return (d,) else: return None map("adl:dates", qualifiedTagCode("045", "b", ind1="01"), ("[MARC] Time Period of Content", PREFIX+"045"), prefilters=fltRemoveBcDate, converters=cvtYmdhDate, id=1) def fltRemoveBcDates (v): if v[0] != None and v[1] != None and v[1].startswith("d"): if v[0].startswith("c"): begin = "d0000" else: begin = v[0] return (begin, v[1]) else: return None def cvtYmdhDates (v): begin = cvtYmdhDate((v[0],)) end = cvtYmdhDate((v[1],)) if begin != None and end != None: return (begin[0], end[0]) else: return None map("adl:dates", [qualifiedTag("045", ind1="2"), code("b")+"[1]", code("b")+"[2]"], ("[MARC] Time Period of Content", PREFIX+"045"), prefilters=fltRemoveBcDates, converters=cvtYmdhDates, id=2) # ---------------------------------------- # ADL:ASSIGNED-TERMS map("adl:assigned-terms", entireQualifiedTag("650", ind2="012356"), ("[MARC] Subject Added Entry--Topical Term", PREFIX+"650")) map("adl:assigned-terms", entireQualifiedTag("651", ind2="012356"), ("[MARC] Subject Added Entry--Geographic Name", PREFIX+"651")) consolidateTextualValues("adl:assigned-terms") # ---------------------------------------- # ADL:SUBJECT-RELATED-TEXT map("adl:subject-related-text", entireQualifiedTag("650", ind2=" 4"), ("[MARC] Subject Added Entry--Topical Term", PREFIX+"650")) map("adl:subject-related-text", entireQualifiedTag("651", ind2=" 4"), ("[MARC] Subject Added Entry--Geographic Name", PREFIX+"651")) consolidateTextualValues("adl:subject-related-text") # ---------------------------------------- # ADL:ORIGINATORS map("adl:originators", entireTag("100"), ("[MARC] Main Entry--Personal Name", PREFIX+"100")) map("adl:originators", entireTag("110"), ("[MARC] Main Entry--Corporate Name", PREFIX+"110")) consolidateTextualValues("adl:originators", " ") # ---------------------------------------- # ADL:IDENTIFIERS map("adl:identifiers", [tagCode("074", "a"), "=USGPO Item Number"], ("[MARC] GPO Item Number", PREFIX+"074")) # ---------------------------------------- # REQUIREMENTS & EXPECTATIONS requirement("adl:titles", "1+") output()