diff --git a/playground.fsx b/playground.fsx index 7e82c69..13793a4 100644 --- a/playground.fsx +++ b/playground.fsx @@ -7,12 +7,201 @@ #r "OBO.NET.CodeGeneration.dll" open OBO.NET +open OBO.NET.DBXref open OBO.NET.CodeGeneration #r "nuget: FSharpAux" #r "nuget: ARCTokenization" open FSharpAux +open FSharpAux.Regex +open ARCTokenization.Terms + +open System + +open type System.Environment + + + + +type OboOntology = + + { + Terms : OboTerm list + TypeDefs : OboTypeDef list + FormatVersion : string + DataVersion : string option + Ontology : string option + Date : DateTime option + SavedBy : string option + AutoGeneratedBy : string option + Subsetdefs : string list + Imports : string list // needs its own type (Record?) + Synonymtypedefs : string list // rethink type, maybe create a mother type (Union? Maybe Record'd be better) + Idspaces : string list // rethink as own Record type + DefaultRelationshipIdPrefix : string option + IdMappings : string list // rethink: maybe a new record? or TermRelation? + Remarks : string list + TreatXrefsAsEquivalents : string list + TreatXrefsAsGenusDifferentias : string list // rethink: maybe a new record? or plain string option? + TreatXrefsAsRelationships : string list // maybe better as its own (Record/Union?) type + TreatXrefsAsIsAs : string list + RelaxUniqueIdentifierAssumptionForNamespaces : string list + RelaxUniqueLabelAssumptionForNamespaces : string list + } + + /// Creates an OboOntology based on the given parameters. + static member create terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idspaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces = + { + Terms = terms + TypeDefs = typedefs + FormatVersion = formatVersion + DataVersion = dataVersion + Ontology = ontology + Date = date + SavedBy = savedBy + AutoGeneratedBy = autoGeneratedBy + Subsetdefs = subsetdefs + Imports = imports + Synonymtypedefs = synonymtypedefs + Idspaces = idspaces + DefaultRelationshipIdPrefix = defaultRelationshipIdPrefix + IdMappings = idMappings + Remarks = remarks + TreatXrefsAsEquivalents = treatXrefsAsEquivalents + TreatXrefsAsGenusDifferentias = treatXrefsAsGenusDifferentias + TreatXrefsAsRelationships = treatXrefsAsRelationships + TreatXrefsAsIsAs = treatXrefsAsIsAs + RelaxUniqueIdentifierAssumptionForNamespaces = relaxUniqueIdentifierAssumptionForNamespaces + RelaxUniqueLabelAssumptionForNamespaces = relaxUniqueLabelAssumptionForNamespaces + } + + /// Creates an OboOntology based on the given arguments. + static member Create(terms, typedefs, formatVersion, ?DataVersion, ?Ontology, ?Date, ?SavedBy, ?AutoGeneratedBy, ?Subsetdefs, ?Imports, ?Synonymtypedefs, ?Idspaces, ?DefaultRelationshipIdPrefix, ?IdMappings, ?Remarks, ?TreatXrefsAsEquivalents, ?TreatXrefsAsGenusDifferentias, ?TreatXrefsAsRelationships, ?TreatXrefsAsIsAs, ?RelaxUniqueIdentifierAssumptionForNamespaces, ?RelaxUniqueLabelAssumptionForNamespaces) = { + Terms = terms + TypeDefs = typedefs + FormatVersion = formatVersion + DataVersion = DataVersion + Ontology = Ontology + Date = Date + SavedBy = SavedBy + AutoGeneratedBy = AutoGeneratedBy + Subsetdefs = defaultArg Subsetdefs [] + Imports = defaultArg Imports [] + Synonymtypedefs = defaultArg Synonymtypedefs [] + Idspaces = defaultArg Idspaces [] + DefaultRelationshipIdPrefix = DefaultRelationshipIdPrefix + IdMappings = defaultArg IdMappings [] + Remarks = defaultArg Remarks [] + TreatXrefsAsEquivalents = defaultArg TreatXrefsAsEquivalents [] + TreatXrefsAsGenusDifferentias = defaultArg TreatXrefsAsGenusDifferentias [] + TreatXrefsAsRelationships = defaultArg TreatXrefsAsRelationships [] + TreatXrefsAsIsAs = defaultArg TreatXrefsAsIsAs [] + RelaxUniqueIdentifierAssumptionForNamespaces = defaultArg RelaxUniqueIdentifierAssumptionForNamespaces [] + RelaxUniqueLabelAssumptionForNamespaces = defaultArg RelaxUniqueLabelAssumptionForNamespaces [] + } + +//let res = (createRegex RegexOptions.None """format-version\s:\s*(?.+)""").Match "format-version : 17.5.1" +//let res = (createRegex RegexOptions.None """format-version\s:\s*(?.+)""").Match "format-verson: 17.5.1" +//res.Groups["formatVersion"].Value +//DateTime.ParseExact("31:12:2000 23:59", "dd:MM:yyyy HH:mm", Globalization.CultureInfo.InvariantCulture) + + /// Reads an OBO Ontology containing document header tags, and term and type def stanzas from lines. + static member fromLines verbose (input : seq) = + + let rxFormatVersion = createRegex RegexOptions.None @"format-version\s*:\s*(?.+)" + let rxDataVersion = createRegex RegexOptions.None @"(?:data-version|version)\s*:\s*(?.+)" + let rxOntology = createRegex RegexOptions.None @"ontology\s*:\s*(?.+)" + let rxDate = createRegex RegexOptions.None @"date\s*:\s*(?\d{2}:\d{2}:\d{4} \d{2}:\d{2})" + let rxSavedBy = createRegex RegexOptions.None @"saved-by\s*:\s*(?.+)" + let rxAutoGeneratedBy = createRegex RegexOptions.None @"auto-generated-by\s*:\s*(?.+)" + let rxSubsetdef = createRegex RegexOptions.None @"subsetdef\s*:\s*(?.+)" + let rxImport = createRegex RegexOptions.None @"import\s*:\s*(?.+)" + let rxSynonymtypedef = createRegex RegexOptions.None @"synonymtypedef\s*:\s*(?.+)" + let rxIdspace = createRegex RegexOptions.None @"idspace\s*:\s*(?.+)" + let rxDefaultRelationshipIdPrefix = createRegex RegexOptions.None @"default-relationship-id-prefix\s*:\s*(?.+)" + let rxIdMapping = createRegex RegexOptions.None @"id-mapping\s*:\s*(?.+)" + let rxRemark = createRegex RegexOptions.None @"remark\s*:\s*(?.+)" + let rxTreatXrefsAsEquivalent = createRegex RegexOptions.None @"treat-xrefs-as-equivalent\s*:\s*(?.+)" + let rxTreatXrefsAsGenusDifferentia = createRegex RegexOptions.None @"treat-xrefs-as-genus-differentia\s*:\s*(?.+)" + let rxTreatXrefsAsRelationship = createRegex RegexOptions.None @"treat-xrefs-as-relationship\s*:\s*(?.+)" + let rxTreatXrefsAsIsA = createRegex RegexOptions.None @"treat-xrefs-as-is_a\s*:\s*(?.+)" + let rxRelaxUniqueIdentifierAssumptionForNamespace = createRegex RegexOptions.None @"relax-unique-identifier-assumption-for-namespace\s*:\s*(?.+)" + let rxRelaxUniqueLabelAssumptionForNamespace = createRegex RegexOptions.None @"relax-unique-label-assumption-for-namespace\s*:\s*(?.+)" + + let en = input.GetEnumerator() + + let rec loop (en : System.Collections.Generic.IEnumerator) terms typedefs formatVersion (dataVersion : string option) ontology (date : DateTime option) (savedBy : string option) (autoGeneratedBy : string option) subsetdefs imports synonymtypedefs idSpaces (defaultRelationshipIdPrefix : string option) idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces lineNumber = + + match en.MoveNext() with + | true -> + match trimComment en.Current with + | "[Term]" -> + let lineNumber,parsedTerm = OboTerm.fromLines verbose en lineNumber "" "" false [] "" "" [] [] [] [] [] [] [] [] false [] [] [] false "" "" + loop en (parsedTerm :: terms) typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs treatXrefsAsRelationships treatXrefsAsGenusDifferentias relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces lineNumber + | "[Typedef]" -> + let lineNumber,parsedTypeDef = OboTypeDef.fromLines verbose en lineNumber "" "" "" "" [] [] false false false false false false false + loop en terms (parsedTypeDef :: typedefs) formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces lineNumber + | x when (rxFormatVersion.Match x).Success -> + if formatVersion <> "" then printfn "WARNING: Duplicate format-version in document header tags!" + loop en terms typedefs (rxFormatVersion.Match x).Groups["formatVersion"].Value dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxDataVersion.Match x).Success -> + if dataVersion.IsSome then printfn "WARNING: Duplicate data-version in document header tags!" + loop en terms typedefs formatVersion (Some (rxDataVersion.Match x).Groups["dataVersion"].Value) ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxOntology.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ((rxOntology.Match x).Groups["ontology"].Value |> Some) date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxDate.Match x).Success -> + if date.IsSome then printfn "WARNING: Duplicate date in document header tags!" + let parsedDate = + try DateTime.ParseExact((rxDate.Match x).Groups["date"].Value, "dd:MM:yyyy HH:mm", Globalization.CultureInfo.InvariantCulture) |> Some with + _ -> + printfn "ERROR: Inproper date given!" + None + loop en terms typedefs formatVersion dataVersion ontology parsedDate savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxSavedBy.Match x).Success -> + if savedBy.IsSome then printfn "WARNING: Duplicate saved-by in document header tags!" + loop en terms typedefs formatVersion dataVersion ontology date ((rxSavedBy.Match x).Groups["savedBy"].Value |> Some) autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxAutoGeneratedBy.Match x).Success -> + if autoGeneratedBy.IsSome then printfn "WARNING: Duplicate auto-generated-by in document header tags!" + loop en terms typedefs formatVersion dataVersion ontology date savedBy ((rxAutoGeneratedBy.Match x).Groups["autoGeneratedBy"].Value |> Some) subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxSubsetdef.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy ((rxSubsetdef.Match x).Groups["subsetdef"].Value :: subsetdefs) imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxImport.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs ((rxImport.Match x).Groups["import"].Value :: imports) synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxSynonymtypedef.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports ((rxSynonymtypedef.Match x).Groups["synonymtypedef"].Value :: synonymtypedefs) idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxIdspace.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs ((rxIdspace.Match x).Groups["idspace"].Value :: idSpaces) defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxDefaultRelationshipIdPrefix.Match x).Success -> + if defaultRelationshipIdPrefix.IsSome then printfn "WARNING: Duplicate default-relationship-id-prefix in document header tags!" + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces ((rxDefaultRelationshipIdPrefix.Match x).Groups["defaultRelationshipIdPrefix"].Value |> Some) idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxIdMapping.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix ((rxIdMapping.Match x).Groups["idMapping"].Value :: idMappings) remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxRemark.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings ((rxRemark.Match x).Groups["remark"].Value :: remarks) treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxTreatXrefsAsEquivalent.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks ((rxTreatXrefsAsEquivalent.Match x).Groups["treatXrefsAsEquivalent"].Value :: treatXrefsAsEquivalents) treatXrefsAsIsAs treatXrefsAsRelationships treatXrefsAsGenusDifferentias relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxTreatXrefsAsGenusDifferentia.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents ((rxTreatXrefsAsGenusDifferentia.Match x).Groups["treatXrefsAsGenusDifferentia"].Value :: treatXrefsAsGenusDifferentias) treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxTreatXrefsAsRelationship.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias ((rxTreatXrefsAsRelationship.Match x).Groups["treatXrefsAsRelationship"].Value :: treatXrefsAsRelationships) treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxTreatXrefsAsIsA.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships ((rxTreatXrefsAsIsA.Match x).Groups["treatXrefsAsIsA"].Value :: treatXrefsAsIsAs) relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxRelaxUniqueIdentifierAssumptionForNamespace.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs ((rxRelaxUniqueIdentifierAssumptionForNamespace.Match x).Groups["relaxUniqueIdentifierAssumptionForNamespace"].Value :: relaxUniqueIdentifierAssumptionForNamespaces) relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxRelaxUniqueLabelAssumptionForNamespace.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces ((rxRelaxUniqueLabelAssumptionForNamespace.Match x).Groups["relaxUniqueLabelAssumptionForNamespace"].Value :: relaxUniqueLabelAssumptionForNamespaces) (lineNumber + 1) + | _ -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | false -> + OboOntology.create (List.rev terms) (List.rev typedefs) formatVersion dataVersion ontology date savedBy autoGeneratedBy (List.rev subsetdefs) (List.rev imports) (List.rev synonymtypedefs) (List.rev idSpaces) defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs treatXrefsAsRelationships treatXrefsAsGenusDifferentias relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces + + loop en [] [] String.Empty None None None None None [] [] [] [] None [] [] [] [] [] [] [] [] 0 // was 1 before + +let fl1 = OboOntology.fromLines false (IO.File.ReadAllLines(IO.Path.Combine(__SOURCE_DIRECTORY__, "tests", "OBO.NET.Tests", "References", "HeaderTags_correct.obo"))) +let fl2 = OboOntology.fromLines false (IO.File.ReadAllLines(IO.Path.Combine(__SOURCE_DIRECTORY__, "tests", "OBO.NET.Tests", "References", "HeaderTags_incorrect.obo"))) +let fl3 = OboOntology.fromLines false (IO.File.ReadAllLines(IO.Path.Combine(__SOURCE_DIRECTORY__, "tests", "OBO.NET.Tests", "References", "HeaderTags_duplicates.obo"))) + open ARCTokenization.Terms open type System.Environment @@ -27,6 +216,7 @@ let actual = |> String.concat "\n" |> String.replace "\r" "" +OBO.NET.OboOntology.toFile @"C:\Repos\CSBiology\OBO.NET\tests\OBO.NET.CodeGeneration.Tests\References\ReferenceOboFile.obo" InvestigationMetadata.ontology // OBO.NET.OboOntology.toFile @"C:\Repos\CSBiology\OBO.NET\tests\OBO.NET.CodeGeneration.Tests\References\ReferenceOboFile.obo" InvestigationMetadata.ontology CodeGeneration.toFile "InvestigationMetadata" InvestigationMetadata.ontology @"C:\Repos\CSBiology\OBO.NET\tests\OBO.NET.CodeGeneration.Tests\References\ReferenceSourceFile2.fs" diff --git a/src/OBO.NET/OboEntries.fs b/src/OBO.NET/OboEntries.fs index eb1987a..eca3665 100644 --- a/src/OBO.NET/OboEntries.fs +++ b/src/OBO.NET/OboEntries.fs @@ -1,6 +1,9 @@ namespace OBO.NET +open System + + /// Functions for working with OboEntries. module OboEntries = @@ -8,7 +11,8 @@ module OboEntries = let fromLines verbose (input : seq) = let en = input.GetEnumerator() - let rec loop (en : System.Collections.Generic.IEnumerator) entries lineNumber = + + let rec loop (en : Collections.Generic.IEnumerator) entries lineNumber = match en.MoveNext() with | true -> @@ -26,5 +30,5 @@ module OboEntries = /// Reads an OBO file and returns a list of OboEntries. let fromFile verbose filepath = - System.IO.File.ReadAllLines filepath + IO.File.ReadAllLines filepath |> fromLines verbose \ No newline at end of file diff --git a/src/OBO.NET/OboOntology.fs b/src/OBO.NET/OboOntology.fs index d9b20bc..5fd7894 100644 --- a/src/OBO.NET/OboOntology.fs +++ b/src/OBO.NET/OboOntology.fs @@ -5,52 +5,190 @@ open DBXref //open OboTypeDef open FSharpAux +open FSharpAux.Regex open ARCtrl.ISA open System +open System.IO /// Ontology containing OBO Terms and OBO Type Defs (OBO 1.2). type OboOntology = { - Terms : OboTerm list - TypeDefs : OboTypeDef list + Terms : OboTerm list + TypeDefs : OboTypeDef list + FormatVersion : string + DataVersion : string option + Ontology : string option + Date : DateTime option + SavedBy : string option + AutoGeneratedBy : string option + Subsetdefs : string list + Imports : string list // needs its own type (Record?) + Synonymtypedefs : string list // rethink type, maybe create a mother type (Union? Maybe Record'd be better) + Idspaces : string list // rethink as own Record type + DefaultRelationshipIdPrefix : string option + IdMappings : string list // rethink: maybe a new record? or TermRelation? + Remarks : string list + TreatXrefsAsEquivalents : string list + TreatXrefsAsGenusDifferentias : string list // rethink: maybe a new record? or plain string option? + TreatXrefsAsRelationships : string list // maybe better as its own (Record/Union?) type + TreatXrefsAsIsAs : string list + RelaxUniqueIdentifierAssumptionForNamespaces : string list + RelaxUniqueLabelAssumptionForNamespaces : string list } - static member create terms typedefs = + /// Creates an OboOntology based on the given parameters. + static member create terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idspaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces = { - Terms = terms - TypeDefs = typedefs + Terms = terms + TypeDefs = typedefs + FormatVersion = formatVersion + DataVersion = dataVersion + Ontology = ontology + Date = date + SavedBy = savedBy + AutoGeneratedBy = autoGeneratedBy + Subsetdefs = subsetdefs + Imports = imports + Synonymtypedefs = synonymtypedefs + Idspaces = idspaces + DefaultRelationshipIdPrefix = defaultRelationshipIdPrefix + IdMappings = idMappings + Remarks = remarks + TreatXrefsAsEquivalents = treatXrefsAsEquivalents + TreatXrefsAsGenusDifferentias = treatXrefsAsGenusDifferentias + TreatXrefsAsRelationships = treatXrefsAsRelationships + TreatXrefsAsIsAs = treatXrefsAsIsAs + RelaxUniqueIdentifierAssumptionForNamespaces = relaxUniqueIdentifierAssumptionForNamespaces + RelaxUniqueLabelAssumptionForNamespaces = relaxUniqueLabelAssumptionForNamespaces } - /// Reads an OBO Ontology containing term and type def stanzas from lines. + /// Creates an OboOntology based on the given arguments. + static member Create(terms, typedefs, formatVersion, ?DataVersion, ?Ontology, ?Date, ?SavedBy, ?AutoGeneratedBy, ?Subsetdefs, ?Imports, ?Synonymtypedefs, ?Idspaces, ?DefaultRelationshipIdPrefix, ?IdMappings, ?Remarks, ?TreatXrefsAsEquivalents, ?TreatXrefsAsGenusDifferentias, ?TreatXrefsAsRelationships, ?TreatXrefsAsIsAs, ?RelaxUniqueIdentifierAssumptionForNamespaces, ?RelaxUniqueLabelAssumptionForNamespaces) = { + Terms = terms + TypeDefs = typedefs + FormatVersion = formatVersion + DataVersion = DataVersion + Ontology = Ontology + Date = Date + SavedBy = SavedBy + AutoGeneratedBy = AutoGeneratedBy + Subsetdefs = defaultArg Subsetdefs [] + Imports = defaultArg Imports [] + Synonymtypedefs = defaultArg Synonymtypedefs [] + Idspaces = defaultArg Idspaces [] + DefaultRelationshipIdPrefix = DefaultRelationshipIdPrefix + IdMappings = defaultArg IdMappings [] + Remarks = defaultArg Remarks [] + TreatXrefsAsEquivalents = defaultArg TreatXrefsAsEquivalents [] + TreatXrefsAsGenusDifferentias = defaultArg TreatXrefsAsGenusDifferentias [] + TreatXrefsAsRelationships = defaultArg TreatXrefsAsRelationships [] + TreatXrefsAsIsAs = defaultArg TreatXrefsAsIsAs [] + RelaxUniqueIdentifierAssumptionForNamespaces = defaultArg RelaxUniqueIdentifierAssumptionForNamespaces [] + RelaxUniqueLabelAssumptionForNamespaces = defaultArg RelaxUniqueLabelAssumptionForNamespaces [] + } + + /// Reads an OBO Ontology containing document header tags, and term and type def stanzas from lines. static member fromLines verbose (input : seq) = + let rxFormatVersion = createRegex RegexOptions.None @"format-version\s*:\s*(?.+)" + let rxDataVersion = createRegex RegexOptions.None @"(?:data-version|version)\s*:\s*(?.+)" + let rxOntology = createRegex RegexOptions.None @"ontology\s*:\s*(?.+)" + let rxDate = createRegex RegexOptions.None @"date\s*:\s*(?\d{2}:\d{2}:\d{4} \d{2}:\d{2})" + let rxSavedBy = createRegex RegexOptions.None @"saved-by\s*:\s*(?.+)" + let rxAutoGeneratedBy = createRegex RegexOptions.None @"auto-generated-by\s*:\s*(?.+)" + let rxSubsetdef = createRegex RegexOptions.None @"subsetdef\s*:\s*(?.+)" + let rxImport = createRegex RegexOptions.None @"import\s*:\s*(?.+)" + let rxSynonymtypedef = createRegex RegexOptions.None @"synonymtypedef\s*:\s*(?.+)" + let rxIdspace = createRegex RegexOptions.None @"idspace\s*:\s*(?.+)" + let rxDefaultRelationshipIdPrefix = createRegex RegexOptions.None @"default-relationship-id-prefix\s*:\s*(?.+)" + let rxIdMapping = createRegex RegexOptions.None @"id-mapping\s*:\s*(?.+)" + let rxRemark = createRegex RegexOptions.None @"remark\s*:\s*(?.+)" + let rxTreatXrefsAsEquivalent = createRegex RegexOptions.None @"treat-xrefs-as-equivalent\s*:\s*(?.+)" + let rxTreatXrefsAsGenusDifferentia = createRegex RegexOptions.None @"treat-xrefs-as-genus-differentia\s*:\s*(?.+)" + let rxTreatXrefsAsRelationship = createRegex RegexOptions.None @"treat-xrefs-as-relationship\s*:\s*(?.+)" + let rxTreatXrefsAsIsA = createRegex RegexOptions.None @"treat-xrefs-as-is_a\s*:\s*(?.+)" + let rxRelaxUniqueIdentifierAssumptionForNamespace = createRegex RegexOptions.None @"relax-unique-identifier-assumption-for-namespace\s*:\s*(?.+)" + let rxRelaxUniqueLabelAssumptionForNamespace = createRegex RegexOptions.None @"relax-unique-label-assumption-for-namespace\s*:\s*(?.+)" + let en = input.GetEnumerator() - let rec loop (en:System.Collections.Generic.IEnumerator) terms typedefs lineNumber = + + let rec loop (en : Collections.Generic.IEnumerator) terms typedefs formatVersion (dataVersion : string option) ontology (date : DateTime option) (savedBy : string option) (autoGeneratedBy : string option) subsetdefs imports synonymtypedefs idSpaces (defaultRelationshipIdPrefix : string option) idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces lineNumber = match en.MoveNext() with | true -> - match (en.Current |> trimComment) with + match trimComment en.Current with | "[Term]" -> let lineNumber,parsedTerm = OboTerm.fromLines verbose en lineNumber "" "" false [] "" "" [] [] [] [] [] [] [] [] false [] [] [] false "" "" - loop en (parsedTerm :: terms) typedefs lineNumber + loop en (parsedTerm :: terms) typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs treatXrefsAsRelationships treatXrefsAsGenusDifferentias relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces lineNumber | "[Typedef]" -> let lineNumber,parsedTypeDef = OboTypeDef.fromLines verbose en lineNumber "" "" "" "" [] [] false false false false false false false - loop en terms (parsedTypeDef :: typedefs) lineNumber - | _ -> loop en terms typedefs (lineNumber + 1) - | false -> OboOntology.create (List.rev terms) (List.rev typedefs) - - loop en [] [] 1 + loop en terms (parsedTypeDef :: typedefs) formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces lineNumber + | x when (rxFormatVersion.Match x).Success -> + if formatVersion <> "" then printfn "WARNING: Duplicate format-version in document header tags!" + loop en terms typedefs (rxFormatVersion.Match x).Groups["formatVersion"].Value dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxDataVersion.Match x).Success -> + if dataVersion.IsSome then printfn "WARNING: Duplicate data-version in document header tags!" + loop en terms typedefs formatVersion (Some (rxDataVersion.Match x).Groups["dataVersion"].Value) ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxOntology.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ((rxOntology.Match x).Groups["ontology"].Value |> Some) date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxDate.Match x).Success -> + if date.IsSome then printfn "WARNING: Duplicate date in document header tags!" + let parsedDate = + try DateTime.ParseExact((rxDate.Match x).Groups["date"].Value, "dd:MM:yyyy HH:mm", Globalization.CultureInfo.InvariantCulture) |> Some with + _ -> + printfn "ERROR: Inproper date given!" + None + loop en terms typedefs formatVersion dataVersion ontology parsedDate savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxSavedBy.Match x).Success -> + if savedBy.IsSome then printfn "WARNING: Duplicate saved-by in document header tags!" + loop en terms typedefs formatVersion dataVersion ontology date ((rxSavedBy.Match x).Groups["savedBy"].Value |> Some) autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxAutoGeneratedBy.Match x).Success -> + if autoGeneratedBy.IsSome then printfn "WARNING: Duplicate auto-generated-by in document header tags!" + loop en terms typedefs formatVersion dataVersion ontology date savedBy ((rxAutoGeneratedBy.Match x).Groups["autoGeneratedBy"].Value |> Some) subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxSubsetdef.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy ((rxSubsetdef.Match x).Groups["subsetdef"].Value :: subsetdefs) imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxImport.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs ((rxImport.Match x).Groups["import"].Value :: imports) synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxSynonymtypedef.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports ((rxSynonymtypedef.Match x).Groups["synonymtypedef"].Value :: synonymtypedefs) idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxIdspace.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs ((rxIdspace.Match x).Groups["idspace"].Value :: idSpaces) defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxDefaultRelationshipIdPrefix.Match x).Success -> + if defaultRelationshipIdPrefix.IsSome then printfn "WARNING: Duplicate default-relationship-id-prefix in document header tags!" + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces ((rxDefaultRelationshipIdPrefix.Match x).Groups["defaultRelationshipIdPrefix"].Value |> Some) idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxIdMapping.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix ((rxIdMapping.Match x).Groups["idMapping"].Value :: idMappings) remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxRemark.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings ((rxRemark.Match x).Groups["remark"].Value :: remarks) treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxTreatXrefsAsEquivalent.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks ((rxTreatXrefsAsEquivalent.Match x).Groups["treatXrefsAsEquivalent"].Value :: treatXrefsAsEquivalents) treatXrefsAsIsAs treatXrefsAsRelationships treatXrefsAsGenusDifferentias relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxTreatXrefsAsGenusDifferentia.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents ((rxTreatXrefsAsGenusDifferentia.Match x).Groups["treatXrefsAsGenusDifferentia"].Value :: treatXrefsAsGenusDifferentias) treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxTreatXrefsAsRelationship.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias ((rxTreatXrefsAsRelationship.Match x).Groups["treatXrefsAsRelationship"].Value :: treatXrefsAsRelationships) treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxTreatXrefsAsIsA.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships ((rxTreatXrefsAsIsA.Match x).Groups["treatXrefsAsIsA"].Value :: treatXrefsAsIsAs) relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxRelaxUniqueIdentifierAssumptionForNamespace.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs ((rxRelaxUniqueIdentifierAssumptionForNamespace.Match x).Groups["relaxUniqueIdentifierAssumptionForNamespace"].Value :: relaxUniqueIdentifierAssumptionForNamespaces) relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | x when (rxRelaxUniqueLabelAssumptionForNamespace.Match x).Success -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces ((rxRelaxUniqueLabelAssumptionForNamespace.Match x).Groups["relaxUniqueLabelAssumptionForNamespace"].Value :: relaxUniqueLabelAssumptionForNamespaces) (lineNumber + 1) + | _ -> + loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1) + | false -> + OboOntology.create (List.rev terms) (List.rev typedefs) formatVersion dataVersion ontology date savedBy autoGeneratedBy (List.rev subsetdefs) (List.rev imports) (List.rev synonymtypedefs) (List.rev idSpaces) defaultRelationshipIdPrefix (List.rev idMappings) (List.rev remarks) (List.rev treatXrefsAsEquivalents) (List.rev treatXrefsAsGenusDifferentias) (List.rev treatXrefsAsRelationships) (List.rev treatXrefsAsIsAs) (List.rev relaxUniqueIdentifierAssumptionForNamespaces) (List.rev relaxUniqueLabelAssumptionForNamespaces) + + loop en [] [] String.Empty None None None None None [] [] [] [] None [] [] [] [] [] [] [] [] 0 // was 1 before /// Reads an OBO Ontology containing term and type def stanzas from a file with the given path. static member fromFile verbose (path : string) = - System.IO.File.ReadAllLines path + File.ReadAllLines path |> OboOntology.fromLines verbose - /// Takes a list of OboEntries and returns the OboOntology based on it. - static member fromOboEntries entries = + /// Takes a list of OboEntries and the document header tags and returns the OboOntology based on them. + static member fromOboEntries formatVersion (dataVersion : string option) ontology (date : DateTime option) (savedBy : string option) (autoGeneratedBy : string option) subsetdefs imports synonymtypedefs idSpaces (defaultRelationshipIdPrefix : string option) idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces entries = let rec loop terms typedefs entries = match entries with @@ -62,7 +200,12 @@ type OboOntology = let terms, typedefs = loop [] [] entries - OboOntology.create terms typedefs + OboOntology.create terms typedefs formatVersion dataVersion ontology (date : DateTime option) (savedBy : string option) (autoGeneratedBy : string option) subsetdefs imports synonymtypedefs idSpaces (defaultRelationshipIdPrefix : string option) idMappings remarks treatXrefsAsEquivalents treatXrefsAsGenusDifferentias treatXrefsAsRelationships treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces + + /// Takes a list of OboEntries and the document header tags and returns the OboOntology based on them. + static member FromOboEntries(entries, formatVersion, ?DataVersion, ?Ontology, ?Date, ?SavedBy, ?AutoGeneratedBy, ?Subsetdefs, ?Imports, ?Synonymtypedefs, ?Idspaces, ?DefaultRelationshipIdPrefix, ?IdMappings, ?Remarks, ?TreatXrefsAsEquivalents, ?TreatXrefsAsGenusDifferentias, ?TreatXrefsAsRelationships, ?TreatXrefsAsIsAs, ?RelaxUniqueIdentifierAssumptionForNamespaces, ?RelaxUniqueLabelAssumptionForNamespaces) = + //let subsetdefs = match Subsetdefs with None -> [] | _ -> Subsetdefs.Value + OboOntology.fromOboEntries formatVersion DataVersion Ontology Date SavedBy AutoGeneratedBy (defaultArg Subsetdefs []) (defaultArg Imports []) (defaultArg Synonymtypedefs []) (defaultArg Idspaces []) DefaultRelationshipIdPrefix (defaultArg IdMappings []) (defaultArg Remarks []) (defaultArg TreatXrefsAsEquivalents []) (defaultArg TreatXrefsAsGenusDifferentias []) (defaultArg TreatXrefsAsRelationships []) (defaultArg TreatXrefsAsIsAs []) (defaultArg RelaxUniqueIdentifierAssumptionForNamespaces []) (defaultArg RelaxUniqueLabelAssumptionForNamespaces []) entries /// Writes an OBO Ontology to term and type def stanzas in line form. static member toLines (oboOntology : OboOntology) = @@ -80,7 +223,7 @@ type OboOntology = /// Writes an OBO Ontology to term and type def stanzas to a file in the given path. static member toFile (path : string) (oboOntology : OboOntology) = - System.IO.File.WriteAllLines(path, OboOntology.toLines oboOntology) + File.WriteAllLines(path, OboOntology.toLines oboOntology) /// Writes an OBO Ontology to term and type def stanzas in line form. member this.ToLines() = diff --git a/tests/OBO.NET.Tests/OBO.NET.Tests.fsproj b/tests/OBO.NET.Tests/OBO.NET.Tests.fsproj index 158c8a4..1883492 100644 --- a/tests/OBO.NET.Tests/OBO.NET.Tests.fsproj +++ b/tests/OBO.NET.Tests/OBO.NET.Tests.fsproj @@ -9,6 +9,9 @@ + + + @@ -29,4 +32,6 @@ + + diff --git a/tests/OBO.NET.Tests/OboOntology.Tests.fs b/tests/OBO.NET.Tests/OboOntology.Tests.fs index 34f1694..9003a66 100644 --- a/tests/OBO.NET.Tests/OboOntology.Tests.fs +++ b/tests/OBO.NET.Tests/OboOntology.Tests.fs @@ -1,14 +1,19 @@ namespace OBO.NET.Tests -open Expecto open OBO.NET +open Expecto + +open System +open System.IO + module OboOntologyTests = [] let oboOntologyTest = testList "OboOntology" [ + let testTerm1 = OboTerm.Create( "id:1", @@ -39,19 +44,103 @@ module OboOntologyTests = Name = "testTerm5", Synonyms = [TermSynonym.parseSynonym None 0 "\"testTerm1\" EXACT []"; TermSynonym.parseSynonym None 1 "\"testTerm2\" BROAD []"; TermSynonym.parseSynonym None 2 "\"testTerm0\" NARROW []"] ) - let testOntology = OboOntology.create [testTerm1; testTerm2; testTerm3; testTerm4; testTerm5] [] + + let testFile1Path = Path.Combine(__SOURCE_DIRECTORY__, "References", "CorrectHeaderTags.obo") + let testFile2Path = Path.Combine(__SOURCE_DIRECTORY__, "References", "IncorrectHeaderTags.obo") + let testFile3Path = Path.Combine(__SOURCE_DIRECTORY__, "References", "DuplicateHeaderTags.obo") + let testFile1 = try OboOntology.fromFile false testFile1Path |> Some with _ -> None + let testFile2 = try OboOntology.fromFile false testFile2Path |> Some with _ -> None + let testFile3 = try OboOntology.fromFile false testFile3Path |> Some with _ -> None + + testList "fromFile" [ + testCase "can read files" <| fun _ -> + Expect.isSome testFile1 $"Could not read testFile1: {testFile1Path}" + Expect.isSome testFile2 $"Could not read testFile2: {testFile2Path}" + Expect.isSome testFile3 $"Could not read testFile3: {testFile3Path}" + testCase "reads correct headers correctly" <| fun _ -> + let formatVersionActual = Option.map (fun o -> o.FormatVersion) testFile1 + let dataVersionActual = Option.map (fun o -> o.DataVersion) testFile1 |> Option.flatten + let ontologyActual = Option.map (fun o -> o.Ontology) testFile1 |> Option.flatten + let dateActual = Option.map (fun o -> o.Date) testFile1 |> Option.flatten + let savedByActual = Option.map (fun o -> o.SavedBy) testFile1 |> Option.flatten + let autoGeneratedByActual = Option.map (fun o -> o.AutoGeneratedBy) testFile1 |> Option.flatten + let subsetdefsActual = Option.map (fun o -> o.Subsetdefs) testFile1 + let importsActual = Option.map (fun o -> o.Imports) testFile1 + let synonymtypedefsActual = Option.map (fun o -> o.Synonymtypedefs) testFile1 + let idSpacesActual = Option.map (fun o -> o.Idspaces) testFile1 + let defaultRelationshipIdPrefixActual = Option.map (fun o -> o.DefaultRelationshipIdPrefix) testFile1 |> Option.flatten + let idMappingsActual = Option.map (fun o -> o.IdMappings) testFile1 + let remarksActual = Option.map (fun o -> o.Remarks) testFile1 + let treatXrefsAsEquivalentsActual = Option.map (fun o -> o.TreatXrefsAsEquivalents) testFile1 + let treatXrefsAsGenusDifferentiasActual = Option.map (fun o -> o.TreatXrefsAsGenusDifferentias) testFile1 + let treatXrefsAsRelationshipsActual = Option.map (fun o -> o.TreatXrefsAsRelationships) testFile1 + let treatXrefsAsIsAsActual = Option.map (fun o -> o.TreatXrefsAsIsAs) testFile1 + let relaxUniqueIdentifierAssumptionForNamespacesActual = Option.map (fun o -> o.RelaxUniqueIdentifierAssumptionForNamespaces) testFile1 + let relaxUniqueLabelAssumptionForNamespacesActual = Option.map (fun o -> o.RelaxUniqueLabelAssumptionForNamespaces) testFile1 + let formatVersionExpected = "0.0.1" |> Some + let dataVersionExpected = "0.0.1" |> Some + let ontologyExpected = "CL" |> Some + let dateExpected = DateTime(1970, 1, 1, 0, 0, 0) |> Some + let savedByExpected = "Oliver Maus" |> Some + let autoGeneratedByExpected = "TalkGPT" |> Some + let subsetdefsExpected = ["GO_SLIM \"GO Slim\""; "GO_BASIC \"GO Basic\""] |> Some + let importsExpected = ["http://purl.obolibrary.org/obo/go.owl"; "http://purl.obolibrary.org/obo/cl.owl"] |> Some + let synonymtypedefsExpected = ["UK_SPELLING \"British spelling\" EXACT"; "US_SPELLING \"American spelling\" EXACT"] |> Some + let idspacesExpected = ["GO urn:lsid:bioontology.org:GO: \"gene ontology terms\""; "GO urn:lsid:bioontology.org:GO: \"gene ontology types\""] |> Some + let defaultRelationshipIdPrefixExpected = "OBO_REL" |> Some + let idMappingsExpected = ["part_of OBO_REL:part_of"; "has_a OBO_REL:has_a"] |> Some + let remarksExpected = ["test1"; "test2"] |> Some + let treatXrefsAsEquivalentExpected = ["CL"; "GO"] |> Some + let treatXrefsAsGenusDifferentiaExpected = ["CL part_of NCBITaxon:7955"; "CL part_of NCBITaxon:7956"] |> Some + let treatXrefsAsRelationshipExpected = ["MA homologous_to"; "MA analogous_to"] |> Some + let treatXrefsAsIsAExpected = ["CL"; "GO"] |> Some + let relaxUniqueIdentifierAssumptionForNamespaceExpected = ["my_combined_ontology"; "my_combined_ontology2"] |> Some + let relaxUniqueLabelAssumptionForNamespaceExpected = ["my_combined_ontology"; "my_combined_ontology2"] |> Some + Expect.equal formatVersionActual formatVersionExpected "format-version is not identical" + Expect.equal dataVersionActual dataVersionExpected "data-version is not identical" + Expect.equal ontologyActual ontologyExpected "ontology is not identical" + Expect.equal dateActual dateExpected "date is not identical" + Expect.equal savedByActual savedByExpected "saved-by is not identical" + Expect.equal autoGeneratedByActual autoGeneratedByExpected "auto-generated-by is not identical" + Expect.equal subsetdefsActual subsetdefsExpected "subsetdefs is not identical" + Expect.equal importsActual importsExpected "imports are not identical" + Expect.equal synonymtypedefsActual synonymtypedefsExpected "synonymtypedefs are not identical" + Expect.equal idSpacesActual idspacesExpected "idspaces are not identical" + Expect.equal defaultRelationshipIdPrefixActual defaultRelationshipIdPrefixExpected "default-relationship-id-prefix is not identical" + Expect.equal idMappingsActual idMappingsExpected "id-mappings are not identical" + Expect.equal remarksActual remarksExpected "remarks are not identical" + Expect.equal treatXrefsAsEquivalentsActual treatXrefsAsEquivalentExpected "treat-xrefs-as-equivalents are not identical" + Expect.equal treatXrefsAsGenusDifferentiasActual treatXrefsAsGenusDifferentiaExpected "treat-xrefs-as-genus-differentia are not identical" + Expect.equal treatXrefsAsRelationshipsActual treatXrefsAsRelationshipExpected "treat-xrefs-as-relationships are not identical" + Expect.equal treatXrefsAsIsAsActual treatXrefsAsIsAExpected "treat-xrefs-as-is-a are not identical" + Expect.equal relaxUniqueIdentifierAssumptionForNamespacesActual relaxUniqueIdentifierAssumptionForNamespaceExpected "relax-unique-identifier-assumption-for-namespaces are not identical" + Expect.equal relaxUniqueLabelAssumptionForNamespacesActual relaxUniqueLabelAssumptionForNamespaceExpected "relax-unique-label-assumption-for-namespaces are not identical" + testCase "reads incorrect headers correctly" <| fun _ -> + Expect.isNone (Option.map (fun o -> o.Date) testFile2 |> Option.flatten) "Date should be missing but was still parsed" + testCase "reads Terms correctly" <| fun _ -> + let termsExpected = List.init 2 (fun i -> OboTerm.Create $"Test:000{i + 1}") |> Some + Expect.equal (Option.map (fun o -> o.Terms) testFile1) termsExpected "Terms did not match" + testCase "reads Typedefs correctly" <| fun _ -> + let typedefsExpected = List.init 2 (fun i -> OboTypeDef.Create($"Test:000{i + 3}", "", "")) |> Some + Expect.equal (Option.map (fun o -> o.TypeDefs) testFile1) typedefsExpected "Terms did not match" + ] + + let testOntology = OboOntology.Create([testTerm1; testTerm2; testTerm3; testTerm4; testTerm5], [], "") + testList "GetRelatedTerms" [ testCase "returns correct related terms" <| fun _ -> let actual = testOntology.GetRelatedTerms(testTerm1) let expected = [testTerm1, "related_to", Some testTerm2; testTerm1, "unrelated_to", Some testTerm3; testTerm1, "antirelated_to", None] Expect.sequenceEqual actual expected "is not equal" ] + testList "GetIsAs" [ testCase "returns correct related terms" <| fun _ -> let actual = testOntology.GetIsAs testTerm3 let expected = [testTerm3, Some testTerm1; testTerm3, Some testTerm2] Expect.sequenceEqual actual expected "is not equal" ] + testList "GetRelations" [ testCase "returns correct TermRelations" <| fun _ -> let actual = testOntology.GetRelations() @@ -68,12 +157,14 @@ module OboOntologyTests = ] Expect.sequenceEqual actual expected "is not equal" ] + testList "GetSynonyms" [ testCase "returns correct synonymous terms" <| fun _ -> let actual = testOntology.GetSynonyms testTerm5 let expected = seq {Exact, testTerm5, testTerm1; Broad, testTerm5, testTerm2} Expect.sequenceEqual actual expected "is not equal" ] + testList "TryGetSynonyms" [ testCase "returns correct synonymous terms" <| fun _ -> let actual = testOntology.TryGetSynonyms testTerm5 diff --git a/tests/OBO.NET.Tests/References/CorrectHeaderTags.obo b/tests/OBO.NET.Tests/References/CorrectHeaderTags.obo new file mode 100644 index 0000000..acea8f3 --- /dev/null +++ b/tests/OBO.NET.Tests/References/CorrectHeaderTags.obo @@ -0,0 +1,47 @@ +format-version: 0.0.1 +data-version: 0.0.1 +ontology: CL +date: 01:01:1970 00:00 +saved-by: Oliver Maus +auto-generated-by: TalkGPT +subsetdef: GO_SLIM "GO Slim" +subsetdef: GO_BASIC "GO Basic" +import: http://purl.obolibrary.org/obo/go.owl +import: http://purl.obolibrary.org/obo/cl.owl +synonymtypedef: UK_SPELLING "British spelling" EXACT +synonymtypedef: US_SPELLING "American spelling" EXACT +idspace: GO urn:lsid:bioontology.org:GO: "gene ontology terms" +idspace: GO urn:lsid:bioontology.org:GO: "gene ontology types" +default-relationship-id-prefix: OBO_REL +id-mapping: part_of OBO_REL:part_of +id-mapping: has_a OBO_REL:has_a +remark: test1 +remark: test2 +treat-xrefs-as-equivalent: CL +treat-xrefs-as-equivalent: GO +treat-xrefs-as-genus-differentia: CL part_of NCBITaxon:7955 +treat-xrefs-as-genus-differentia: CL part_of NCBITaxon:7956 +treat-xrefs-as-relationship: MA homologous_to +treat-xrefs-as-relationship: MA analogous_to +treat-xrefs-as-is_a: CL +treat-xrefs-as-is_a: GO +relax-unique-identifier-assumption-for-namespace: my_combined_ontology +relax-unique-identifier-assumption-for-namespace: my_combined_ontology2 +relax-unique-label-assumption-for-namespace: my_combined_ontology +relax-unique-label-assumption-for-namespace: my_combined_ontology2 + +[Term] +id: Test:0001 + +[Term] +id: Test:0002 + +[Typedef] +id: Test:0003 +name: +range: + +[Typedef] +id: Test:0004 +name: +range: \ No newline at end of file diff --git a/tests/OBO.NET.Tests/References/DuplicateHeaderTags.obo b/tests/OBO.NET.Tests/References/DuplicateHeaderTags.obo new file mode 100644 index 0000000..1ebe98f --- /dev/null +++ b/tests/OBO.NET.Tests/References/DuplicateHeaderTags.obo @@ -0,0 +1,28 @@ +format-version: 0.0.1 +format-version: 0.0.2 +data-version: 0.0.1 +data-version: 0.0.2 +date: 01:01:1970 00:00 +date: 01:01:1970 00:01 +saved-by: Oliver Maus +saved-by: Loliver Laus +auto-generated-by: TalkGPT +auto-generated-by: SpeakGPT +default-relationship-id-prefix: OBO_REL +default-relationship-id-prefix: BABO_REL + +[Term] +id: Test:0001 + +[Term] +id: Test:0002 + +[Typedef] +id: Test:0003 +name: +range: + +[Typedef] +id: Test:0004 +name: +range: \ No newline at end of file diff --git a/tests/OBO.NET.Tests/References/IncorrectHeaderTags.obo b/tests/OBO.NET.Tests/References/IncorrectHeaderTags.obo new file mode 100644 index 0000000..f1ba959 --- /dev/null +++ b/tests/OBO.NET.Tests/References/IncorrectHeaderTags.obo @@ -0,0 +1,17 @@ +date: 32:14:1970 25:61 + +[Term] +id: Test:0001 + +[Term] +id: Test:0002 + +[Typedef] +id: Test:0003 +name: +range: + +[Typedef] +id: Test:0004 +name: +range: \ No newline at end of file