diff options
Diffstat (limited to 'rdfloader/parser2v2/utils.go')
-rw-r--r-- | rdfloader/parser2v2/utils.go | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/rdfloader/parser2v2/utils.go b/rdfloader/parser2v2/utils.go new file mode 100644 index 0000000..e0c3557 --- /dev/null +++ b/rdfloader/parser2v2/utils.go @@ -0,0 +1,160 @@ +package parser2v2 + +import ( + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + urilib "github.com/RishabhBhatnagar/gordf/uri" + "github.com/spdx/tools-golang/spdx" + "regexp" + "strings" +) + +func getLastPartOfURI(uri string) string { + if strings.Contains(uri, "#") { + parts := strings.Split(uri, "#") + return parts[len(parts)-1] + } + parts := strings.Split(uri, "/") + return parts[len(parts)-1] +} + +func stripLastPartOfUri(uri string) string { + lastPart := getLastPartOfURI(uri) + uri = strings.TrimSuffix(uri, lastPart) + return uri +} + +func stripJoiningChars(uri string) string { + return strings.TrimSuffix(strings.TrimSuffix(uri, "/"), "#") +} + +func isUriSame(uri1, uri2 string) bool { + return stripJoiningChars(uri1) == stripJoiningChars(uri2) +} + +func (parser *rdfParser2_2) filterAllTriplesByString(subject, predicate, object string) (retTriples []*gordfParser.Triple) { + for _, triple := range parser.gordfParserObj.Triples { + if triple.Subject.ID == subject && triple.Predicate.ID == predicate && triple.Object.ID == object { + retTriples = append(retTriples, triple) + } + } + return retTriples +} + +func (parser *rdfParser2_2) filterTriplesByRegex(triples []*gordfParser.Triple, subject, predicate, object string) (retTriples []*gordfParser.Triple, err error) { + var subjectCompiled, objectCompiled, predicateCompiled *regexp.Regexp + subjectCompiled, err = regexp.Compile(subject) + if err != nil { + return + } + predicateCompiled, err = regexp.Compile(predicate) + if err != nil { + return + } + objectCompiled, err = regexp.Compile(object) + if err != nil { + return + } + for _, triple := range triples { + if subjectCompiled.MatchString(triple.Subject.ID) && predicateCompiled.MatchString(triple.Predicate.ID) && objectCompiled.MatchString(triple.Object.ID) { + retTriples = append(retTriples, triple) + } + } + return +} + +func isUriValid(uri string) bool { + _, err := urilib.NewURIRef(uri) + return err == nil +} + + +// Function Below this line is taken from the tvloader/parser2v2/utils.go + +// used to extract DocumentRef and SPDXRef values from an SPDX Identifier +// which can point either to this document or to a different one +func ExtractDocElementID(value string) (spdx.DocElementID, error) { + docRefID := "" + idStr := value + + // check prefix to see if it's a DocumentRef ID + if strings.HasPrefix(idStr, "DocumentRef-") { + // extract the part that comes between "DocumentRef-" and ":" + strs := strings.Split(idStr, ":") + // should be exactly two, part before and part after + if len(strs) < 2 { + return spdx.DocElementID{}, fmt.Errorf("no colon found although DocumentRef- prefix present") + } + if len(strs) > 2 { + return spdx.DocElementID{}, fmt.Errorf("more than one colon found") + } + + // trim the prefix and confirm non-empty + docRefID = strings.TrimPrefix(strs[0], "DocumentRef-") + if docRefID == "" { + return spdx.DocElementID{}, fmt.Errorf("document identifier has nothing after prefix") + } + // and use remainder for element ID parsing + idStr = strs[1] + } + + // check prefix to confirm it's got the right prefix for element IDs + if !strings.HasPrefix(idStr, "SPDXRef-") { + return spdx.DocElementID{}, fmt.Errorf("missing SPDXRef- prefix for element identifier") + } + + // make sure no colons are present + if strings.Contains(idStr, ":") { + // we know this means there was no DocumentRef- prefix, because + // we would have handled multiple colons above if it was + return spdx.DocElementID{}, fmt.Errorf("invalid colon in element identifier") + } + + // trim the prefix and confirm non-empty + eltRefID := strings.TrimPrefix(idStr, "SPDXRef-") + if eltRefID == "" { + return spdx.DocElementID{}, fmt.Errorf("element identifier has nothing after prefix") + } + + // we're good + return spdx.DocElementID{DocumentRefID: docRefID, ElementRefID: spdx.ElementID(eltRefID)}, nil +} + +// used to extract SPDXRef values only from an SPDX Identifier which can point +// to this document only. Use extractDocElementID for parsing IDs that can +// refer either to this document or a different one. +func ExtractElementID(value string) (spdx.ElementID, error) { + // check prefix to confirm it's got the right prefix for element IDs + if !strings.HasPrefix(value, "SPDXRef-") { + return spdx.ElementID(""), fmt.Errorf("missing SPDXRef- prefix for element identifier") + } + + // make sure no colons are present + if strings.Contains(value, ":") { + return spdx.ElementID(""), fmt.Errorf("invalid colon in element identifier") + } + + // trim the prefix and confirm non-empty + eltRefID := strings.TrimPrefix(value, "SPDXRef-") + if eltRefID == "" { + return spdx.ElementID(""), fmt.Errorf("element identifier has nothing after prefix") + } + + // we're good + return spdx.ElementID(eltRefID), nil +} + +// used to extract key / value from embedded substrings +// returns subkey, subvalue, nil if no error, or "", "", error otherwise +func ExtractSubs(value string, sep string) (string, string, error) { + // parse the value to see if it's a valid subvalue format + sp := strings.SplitN(value, sep, 2) + if len(sp) == 1 { + return "", "", fmt.Errorf("invalid subvalue format for %s (no %s found)", value, sep) + } + + subkey := strings.TrimSpace(sp[0]) + subvalue := strings.TrimSpace(sp[1]) + + return subkey, subvalue, nil +}
\ No newline at end of file |