<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><meta http-equiv=Content-Type content="text/html; charset=iso-8859-1"><meta name=Generator content="Microsoft Word 14 (filtered medium)"><style><!--
/* Font Definitions */
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:Tahoma;
        panose-1:2 11 6 4 3 5 4 4 2 4;}
@font-face
        {font-family:Consolas;
        panose-1:2 11 6 9 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        margin-bottom:.0001pt;
        font-size:11.0pt;
        font-family:"Calibri","sans-serif";
        mso-fareast-language:EN-US;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:blue;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {mso-style-priority:99;
        color:purple;
        text-decoration:underline;}
p.MsoPlainText, li.MsoPlainText, div.MsoPlainText
        {mso-style-priority:99;
        mso-style-link:"Plain Text Char";
        margin:0in;
        margin-bottom:.0001pt;
        font-size:11.0pt;
        font-family:"Calibri","sans-serif";
        mso-fareast-language:EN-US;}
p.MsoAcetate, li.MsoAcetate, div.MsoAcetate
        {mso-style-priority:99;
        mso-style-link:"Balloon Text Char";
        margin:0in;
        margin-bottom:.0001pt;
        font-size:8.0pt;
        font-family:"Tahoma","sans-serif";
        mso-fareast-language:EN-US;}
p.MsoListParagraph, li.MsoListParagraph, div.MsoListParagraph
        {mso-style-priority:34;
        margin-top:0in;
        margin-right:0in;
        margin-bottom:0in;
        margin-left:.5in;
        margin-bottom:.0001pt;
        font-size:11.0pt;
        font-family:"Calibri","sans-serif";
        mso-fareast-language:EN-US;}
span.PlainTextChar
        {mso-style-name:"Plain Text Char";
        mso-style-priority:99;
        mso-style-link:"Plain Text";
        font-family:"Calibri","sans-serif";}
span.BalloonTextChar
        {mso-style-name:"Balloon Text Char";
        mso-style-priority:99;
        mso-style-link:"Balloon Text";
        font-family:"Tahoma","sans-serif";}
span.EmailStyle22
        {mso-style-type:personal-reply;
        font-family:"Calibri","sans-serif";
        color:#1F497D;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
        {page:WordSection1;}
/* List Definitions */
@list l0
        {mso-list-id:355155171;
        mso-list-type:hybrid;
        mso-list-template-ids:369130640 134807567 134807577 134807579 134807567 134807577 134807579 134807567 134807577 134807579;}
@list l0:level1
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l0:level2
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l0:level3
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l0:level4
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l0:level5
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l0:level6
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l0:level7
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l0:level8
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l0:level9
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l1
        {mso-list-id:751856881;
        mso-list-type:hybrid;
        mso-list-template-ids:1202992782 134807567 134807577 134807579 134807567 134807577 134807579 134807567 134807577 134807579;}
@list l1:level1
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l1:level2
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l1:level3
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l1:level4
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l1:level5
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l1:level6
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l1:level7
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l1:level8
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l1:level9
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l2
        {mso-list-id:1441607461;
        mso-list-type:hybrid;
        mso-list-template-ids:-1215405610 134807567 134807577 134807579 134807567 134807577 134807579 134807567 134807577 134807579;}
@list l2:level1
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l2:level2
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l2:level3
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l2:level4
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l2:level5
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l2:level6
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l2:level7
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l2:level8
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l2:level9
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l3
        {mso-list-id:1898011062;
        mso-list-type:hybrid;
        mso-list-template-ids:-314165980 134807567 134807577 134807579 134807567 134807577 134807579 134807567 134807577 134807579;}
@list l3:level1
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l3:level2
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l3:level3
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l3:level4
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l3:level5
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l3:level6
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l3:level7
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l3:level8
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;}
@list l3:level9
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
ol
        {margin-bottom:0in;}
ul
        {margin-bottom:0in;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]--></head><body lang=EN-GB link=blue vlink=purple><div class=WordSection1><p class=MsoNormal><span style='color:#1F497D'>It’s been a couple of weeks but I said I’d try to write something about a more general concern I have around the way we use basisOfRecord and dcterms:type to hold values like occurrence, event and materialSample. This is something that has concerned me for years and that, I worry, is making everything we all do much messier than it need be.<o:p></o:p></span></p><p class=MsoNormal><span style='color:#1F497D'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#1F497D'>I believe that the way we have come to use Darwin Core basisOfRecord is confused and unhelpful. I really wish we used Darwin Core like this:<o:p></o:p></span></p><p class=MsoNormal><span style='color:#1F497D'><o:p> </o:p></span></p><p class=MsoListParagraph style='text-indent:-.25in;mso-list:l2 level1 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>1.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>basisOfRecord should be used ONLY to indicate the type of evidence that lies behind a record – a key aspect of whether the record is likely to be useful for different purposes<o:p></o:p></span></p><p class=MsoListParagraph style='text-indent:-.25in;mso-list:l2 level1 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>2.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>basisOfRecord values should be taken from a hierarchical vocabulary with three main branches:<o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:1.0in;text-indent:-.25in;mso-list:l2 level2 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>a.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>“specimens” (i.e. biological material that can be reviewed), with a hierarchy of subordinate values such as “pinnedSpecimen”, “herbariumSheet”, etc.<o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:1.0in;text-indent:-.25in;mso-list:l2 level2 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>b.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>derived, non-biological evidence (not sure what name), with a hierarchy of subordinate values such as “dnaSequence”, “soundRecording”, “stillImage”, etc.<o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:1.0in;text-indent:-.25in;mso-list:l2 level2 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>c.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>asserted observations with no revisitable evidence other than the authority of the observer<o:p></o:p></span></p><p class=MsoListParagraph style='text-indent:-.25in;mso-list:l2 level1 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>3.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>TDWG should deliver a basic ontology in the form of a graph of key relationships between the most significant conceptual entities in our world (TaxonName, TaxonConcept, Identification, Collection, Specimen, Locality, Agent, …)<o:p></o:p></span></p><p class=MsoListParagraph style='text-indent:-.25in;mso-list:l2 level1 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>4.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>This ontology should not attempt to map all the complexity of biodiversity-related data – just provide the high-level map and key relationships (TaxonConcept hasName TaxonName, Specimen heldIn Collection, etc.) – it should leave definition of other properties as a separate, open-ended activity for the community<o:p></o:p></span></p><p class=MsoListParagraph style='text-indent:-.25in;mso-list:l2 level1 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>5.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>This ontology should be reviewed at regular intervals and versioned as necessary to address critical gaps – provided that backwards compatibility is maintained (splitting a class into multiple consitituent classes probably won’t break anything, so start simple)<o:p></o:p></span></p><p class=MsoListParagraph style='text-indent:-.25in;mso-list:l2 level1 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>6.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>The Darwin Core vocabulary should be published as a flat, open-ended list of terms with clear definitions that can be freely combined as columns in denormalised records<o:p></o:p></span></p><p class=MsoListParagraph style='text-indent:-.25in;mso-list:l2 level1 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>7.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>Every Darwin Core term should be documented to be tightly associated with a single, fixed class in the ontology (e.g. scientificName and specificEpithet are ALWAYS considered to be properties of a TaxonName whether or not that TaxonName object is clearly referenced or separated out)<o:p></o:p></span></p><p class=MsoListParagraph style='text-indent:-.25in;mso-list:l2 level1 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>8.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>Every data publisher should be encouraged to share all relevant data elements in their source data in the most convenient normalised or denormalised form, provided they use the recognised Darwin Core properties for elements that match the definition for those terms, and provided they give some metadata for other elements. Possible forms include:<o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:1.0in;text-indent:-.25in;mso-list:l2 level2 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>a.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>A completely hierarchical, ABCD-like, XML representation<o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:1.0in;text-indent:-.25in;mso-list:l2 level2 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>b.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>A completely flat denormalised, simple-DwC-like, CVS representation, if the data includes no elements with higher cardinality<o:p></o:p></span></p><p class=MsoListParagraph style='margin-left:1.0in;text-indent:-.25in;mso-list:l2 level2 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>c.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>A set of flat, relational, CVS representations, as with Darwin Core Archive star schemas, but with freedom to have more complex graphed relationships as needed<o:p></o:p></span></p><p class=MsoListParagraph style='text-indent:-.25in;mso-list:l2 level1 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>9.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>Each table of CVS data in 8b and 8c is a view that corresponds to a linear subgraph of the TDWG ontology, identified by the classes of the DwC properties used – this allows us to infer the “shape” of the data in terms of the ontology<o:p></o:p></span></p><p class=MsoListParagraph style='text-indent:-.25in;mso-list:l2 level1 lfo7'><![if !supportLists]><span style='color:#1F497D'><span style='mso-list:Ignore'>10.<span style='font:7.0pt "Times New Roman"'> </span></span></span><![endif]><span style='color:#1F497D'>If we do this, we do not need to worry about whether a record is a checklist record, an event, an occurrence, a material sample or whatever else, although we could use the dcterms: type property, or some new property, to hold this detail as a further clue to intent and possible use for the record<o:p></o:p></span></p><p class=MsoNormal><span style='color:#1F497D'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#1F497D'>Here is an example. In today’s terms, what sort of DwC record is this? Do I really have to replace “recordId” with “eventId”, “occurrenceId” or similar? And which should I choose?<o:p></o:p></span></p><p class=MsoNormal><span style='color:#1F497D'><o:p> </o:p></span></p><p class=MsoNormal><b><span style='color:#1F497D'>recordId, decimalLatitude, decimalLongitude, coordinatePrecision, eventDate, scientificName, individualCount<o:p></o:p></span></b></p><p class=MsoNormal><span style='color:#1F497D'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#1F497D'>I think it is clear that this record tells us that there was a recording event at a particular time and place where someone or some process recorded a given number of individual organisms which were identified as representatives of a taxon concept with a name corresponding to the supplied scientific name. In other words this gives us some properties from a subgraph that might include, say, instances of TDWG Event, Locality, Date, Occurrence, Identification, TaxonConcept and TaxonName classes. None of these is specifically referenced but we can unambiguously fold the flat record onto the ontology. We can moreover then use the combination of supplied elements to decide whether this record would be of interest to GBIF, a national information facility, a tool cataloguing uses of scientific names, etc. The same will also apply if multiple CVS tables are provided as in 8c. <o:p></o:p></span></p><p class=MsoNormal><span style='color:#1F497D'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#1F497D'>I have thought about this for a long time and cannot yet think of an area in which this would not work efficiently – and unambiguously – for all concerned. There are some cases where multiple instances of the same ontology class would be referenced within a single record, which may mean more care is needed by the publisher (e.g. if an insect specimen record includes a reference to a host plant). There may be cases where automated review of the data indicates that there are impossible combinations or ambiguities that the publisher must resolve. However I believe we could use this approach to generalise all mobilisation and consumption of biodiversity data (including all the things we have addressed under ABCD, SDD, TCS, Plinian Core, etc.) and to make it genuinely possible for any data holder to share all the data they have in a form that makes sense to them, while allowing others to consume these data intelligently.<o:p></o:p></span></p><p class=MsoNormal><span style='color:#1F497D'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#1F497D'>Right now, I think our confused use of basisOfRecord is almost the only thing that stops us from exploring this. We have blurred the question of the evidence for a record, with the question of the “shape” of the record as a subgraph. These are different things. Separating them will allow us to get away from some of our unresolvable debates and open up the doors to much simpler data sharing and reuse.<o:p></o:p></span></p><p class=MsoNormal><span style='color:#1F497D'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#1F497D'>Thanks, <o:p></o:p></span></p><p class=MsoNormal><span style='color:#1F497D'><o:p> </o:p></span></p><p class=MsoNormal><span style='color:#1F497D'>Donald <o:p></o:p></span></p><p class=MsoNormal><span style='color:#1F497D'><o:p> </o:p></span></p><div><p class=MsoNormal><span lang=EN-US style='font-size:10.5pt;font-family:Consolas;color:#1F497D;mso-fareast-language:EN-GB'>----------------------------------------------------------------------<o:p></o:p></span></p><p class=MsoNormal><span lang=EN-US style='font-size:10.5pt;font-family:Consolas;color:#1F497D;mso-fareast-language:EN-GB'>Donald Hobern - GBIF Director - </span><a href="mailto:dhobern@gbif.org"><span lang=EN-US style='font-size:10.5pt;font-family:Consolas;color:blue;mso-fareast-language:EN-GB'>dhobern@gbif.org</span></a><span style='font-size:10.5pt;font-family:Consolas;color:#1F497D;mso-fareast-language:EN-GB'> <span lang=EN-US><o:p></o:p></span></span></p><p class=MsoNormal><span lang=EN-US style='font-size:10.5pt;font-family:Consolas;color:#1F497D;mso-fareast-language:EN-GB'>Global Biodiversity Information Facility </span><a href="http://www.gbif.org/"><span lang=EN-US style='font-size:10.5pt;font-family:Consolas;color:blue;mso-fareast-language:EN-GB'>http://www.gbif.org/</span></a><span style='font-size:10.5pt;font-family:Consolas;color:#1F497D;mso-fareast-language:EN-GB'> <span lang=EN-US><o:p></o:p></span></span></p><p class=MsoNormal><span lang=DA style='font-size:10.5pt;font-family:Consolas;color:#1F497D;mso-fareast-language:EN-GB'>GBIF Secretariat, Universitetsparken 15, DK-2100 Copenhagen Ų, Denmark<o:p></o:p></span></p><p class=MsoNormal><span lang=EN-US style='font-size:10.5pt;font-family:Consolas;color:#1F497D;mso-fareast-language:EN-GB'>Tel: +45 3532 1471 Mob: +45 2875 1471 Fax: +45 2875 1480<o:p></o:p></span></p><p class=MsoNormal><span lang=EN-US style='font-size:10.5pt;font-family:Consolas;color:#1F497D;mso-fareast-language:EN-GB'>----------------------------------------------------------------------<o:p></o:p></span></p></div></div></body></html>