From a47f1698a1fdd2360c661440e8925e84cc971263 Mon Sep 17 00:00:00 2001 From: Steve Winslow Date: Sun, 8 Nov 2020 15:47:38 -0500 Subject: Fix parsing for expanded external document refs Signed-off-by: Steve Winslow --- tvloader/parser2v1/parse_creation_info.go | 71 ++++++++++++++++- tvloader/parser2v1/parse_creation_info_test.go | 104 +++++++++++++++++++++++-- tvloader/parser2v2/parse_creation_info.go | 71 ++++++++++++++++- tvloader/parser2v2/parse_creation_info_test.go | 104 +++++++++++++++++++++++-- 4 files changed, 336 insertions(+), 14 deletions(-) (limited to 'tvloader') diff --git a/tvloader/parser2v1/parse_creation_info.go b/tvloader/parser2v1/parse_creation_info.go index b02b5c0..44d1dfc 100644 --- a/tvloader/parser2v1/parse_creation_info.go +++ b/tvloader/parser2v1/parse_creation_info.go @@ -4,6 +4,7 @@ package parser2v1 import ( "fmt" + "strings" "github.com/spdx/tools-golang/spdx" ) @@ -16,7 +17,9 @@ func (parser *tvParser2_1) parsePairFromCreationInfo2_1(tag string, value string // create an SPDX Creation Info data struct if we don't have one already if parser.doc.CreationInfo == nil { - parser.doc.CreationInfo = &spdx.CreationInfo2_1{} + parser.doc.CreationInfo = &spdx.CreationInfo2_1{ + ExternalDocumentReferences: map[string]spdx.ExternalDocumentRef2_1{}, + } } ci := parser.doc.CreationInfo @@ -32,7 +35,17 @@ func (parser *tvParser2_1) parsePairFromCreationInfo2_1(tag string, value string case "DocumentNamespace": ci.DocumentNamespace = value case "ExternalDocumentRef": - ci.ExternalDocumentReferences = append(ci.ExternalDocumentReferences, value) + documentRefID, uri, alg, checksum, err := extractExternalDocumentReference(value) + if err != nil { + return err + } + edr := spdx.ExternalDocumentRef2_1{ + DocumentRefID: documentRefID, + URI: uri, + Alg: alg, + Checksum: checksum, + } + ci.ExternalDocumentReferences[documentRefID] = edr case "LicenseListVersion": ci.LicenseListVersion = value case "Creator": @@ -105,3 +118,57 @@ func (parser *tvParser2_1) parsePairFromCreationInfo2_1(tag string, value string return nil } + +// ===== Helper functions ===== + +func extractExternalDocumentReference(value string) (string, string, string, string, error) { + sp := strings.Split(value, " ") + // remove any that are just whitespace + keepSp := []string{} + for _, s := range sp { + ss := strings.TrimSpace(s) + if ss != "" { + keepSp = append(keepSp, ss) + } + } + + var documentRefID, uri, alg, checksum string + + // now, should have 4 items (or 3, if Alg and Checksum were joined) + // and should be able to map them + if len(keepSp) == 4 { + documentRefID = keepSp[0] + uri = keepSp[1] + alg = keepSp[2] + // check that colon is present for alg, and remove it + if !strings.HasSuffix(alg, ":") { + return "", "", "", "", fmt.Errorf("algorithm does not end with colon") + } + alg = strings.TrimSuffix(alg, ":") + checksum = keepSp[3] + } else if len(keepSp) == 3 { + documentRefID = keepSp[0] + uri = keepSp[1] + // split on colon into alg and checksum + parts := strings.SplitN(keepSp[2], ":", 2) + if len(parts) != 2 { + return "", "", "", "", fmt.Errorf("missing colon separator between algorithm and checksum") + } + alg = parts[0] + checksum = parts[1] + } else { + return "", "", "", "", fmt.Errorf("expected 4 elements, got %d", len(keepSp)) + } + + // additionally, we should be able to parse the first element as a + // DocumentRef- ID string, and we should remove that prefix + if !strings.HasPrefix(documentRefID, "DocumentRef-") { + return "", "", "", "", fmt.Errorf("expected first element to have DocumentRef- prefix") + } + documentRefID = strings.TrimPrefix(documentRefID, "DocumentRef-") + if documentRefID == "" { + return "", "", "", "", fmt.Errorf("document identifier has nothing after prefix") + } + + return documentRefID, uri, alg, checksum, nil +} diff --git a/tvloader/parser2v1/parse_creation_info_test.go b/tvloader/parser2v1/parse_creation_info_test.go index f21cfd1..c8741de 100644 --- a/tvloader/parser2v1/parse_creation_info_test.go +++ b/tvloader/parser2v1/parse_creation_info_test.go @@ -242,7 +242,19 @@ func TestParser2_1CanParseCreationInfoTags(t *testing.T) { // External Document Reference refs := []string{ "DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", - "DocumentRef-xyz-2.1.2 http://example.com/xyz-2.1.2 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2760", + "DocumentRef-xyz-2.1.2 http://example.com/xyz-2.1.2 SHA1:d6a770ba38583ed4bb4525bd96e50461655d2760", + } + wantRef0 := spdx.ExternalDocumentRef2_1{ + DocumentRefID: "spdx-tool-1.2", + URI: "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301", + Alg: "SHA1", + Checksum: "d6a770ba38583ed4bb4525bd96e50461655d2759", + } + wantRef1 := spdx.ExternalDocumentRef2_1{ + DocumentRefID: "xyz-2.1.2", + URI: "http://example.com/xyz-2.1.2", + Alg: "SHA1", + Checksum: "d6a770ba38583ed4bb4525bd96e50461655d2760", } err = parser.parsePairFromCreationInfo2_1("ExternalDocumentRef", refs[0]) if err != nil { @@ -252,10 +264,22 @@ func TestParser2_1CanParseCreationInfoTags(t *testing.T) { if err != nil { t.Errorf("expected nil error, got %v", err) } - if len(parser.doc.CreationInfo.ExternalDocumentReferences) != 2 || - parser.doc.CreationInfo.ExternalDocumentReferences[0] != refs[0] || - parser.doc.CreationInfo.ExternalDocumentReferences[1] != refs[1] { - t.Errorf("got %v for ExternalDocumentReferences", parser.doc.CreationInfo.ExternalDocumentReferences) + if len(parser.doc.CreationInfo.ExternalDocumentReferences) != 2 { + t.Errorf("got %d ExternalDocumentReferences, expected %d", len(parser.doc.CreationInfo.ExternalDocumentReferences), 2) + } + gotRef0 := parser.doc.CreationInfo.ExternalDocumentReferences["spdx-tool-1.2"] + if gotRef0.DocumentRefID != wantRef0.DocumentRefID || + gotRef0.URI != wantRef0.URI || + gotRef0.Alg != wantRef0.Alg || + gotRef0.Checksum != wantRef0.Checksum { + t.Errorf("got %#v for ExternalDocumentReferences[0], wanted %#v", gotRef0, wantRef0) + } + gotRef1 := parser.doc.CreationInfo.ExternalDocumentReferences["xyz-2.1.2"] + if gotRef1.DocumentRefID != wantRef1.DocumentRefID || + gotRef1.URI != wantRef1.URI || + gotRef1.Alg != wantRef1.Alg || + gotRef1.Checksum != wantRef1.Checksum { + t.Errorf("got %#v for ExternalDocumentReferences[1], wanted %#v", gotRef1, wantRef1) } // License List Version @@ -429,3 +453,73 @@ func TestParser2_1CICreatesAnnotation(t *testing.T) { t.Errorf("pointer to new Annotation doesn't match idx 0 for doc.Annotations[]") } } + +// ===== Helper function tests ===== + +func TestCanExtractExternalDocumentReference(t *testing.T) { + refstring := "DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1:d6a770ba38583ed4bb4525bd96e50461655d2759" + wantDocumentRefID := "spdx-tool-1.2" + wantURI := "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301" + wantAlg := "SHA1" + wantChecksum := "d6a770ba38583ed4bb4525bd96e50461655d2759" + + gotDocumentRefID, gotURI, gotAlg, gotChecksum, err := extractExternalDocumentReference(refstring) + if err != nil { + t.Errorf("got non-nil error: %v", err) + } + if wantDocumentRefID != gotDocumentRefID { + t.Errorf("wanted document ref ID %s, got %s", wantDocumentRefID, gotDocumentRefID) + } + if wantURI != gotURI { + t.Errorf("wanted URI %s, got %s", wantURI, gotURI) + } + if wantAlg != gotAlg { + t.Errorf("wanted alg %s, got %s", wantAlg, gotAlg) + } + if wantChecksum != gotChecksum { + t.Errorf("wanted checksum %s, got %s", wantChecksum, gotChecksum) + } +} + +func TestCanExtractExternalDocumentReferenceWithExtraWhitespace(t *testing.T) { + refstring := " DocumentRef-spdx-tool-1.2 \t http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 \t SHA1: \t d6a770ba38583ed4bb4525bd96e50461655d2759" + wantDocumentRefID := "spdx-tool-1.2" + wantURI := "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301" + wantAlg := "SHA1" + wantChecksum := "d6a770ba38583ed4bb4525bd96e50461655d2759" + + gotDocumentRefID, gotURI, gotAlg, gotChecksum, err := extractExternalDocumentReference(refstring) + if err != nil { + t.Errorf("got non-nil error: %v", err) + } + if wantDocumentRefID != gotDocumentRefID { + t.Errorf("wanted document ref ID %s, got %s", wantDocumentRefID, gotDocumentRefID) + } + if wantURI != gotURI { + t.Errorf("wanted URI %s, got %s", wantURI, gotURI) + } + if wantAlg != gotAlg { + t.Errorf("wanted alg %s, got %s", wantAlg, gotAlg) + } + if wantChecksum != gotChecksum { + t.Errorf("wanted checksum %s, got %s", wantChecksum, gotChecksum) + } +} + +func TestFailsExternalDocumentReferenceWithInvalidFormats(t *testing.T) { + invalidRefs := []string{ + "whoops", + "DocumentRef-", + "DocumentRef- ", + "DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301", + "DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 d6a770ba38583ed4bb4525bd96e50461655d2759", + "DocumentRef-spdx-tool-1.2", + "spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1:d6a770ba38583ed4bb4525bd96e50461655d2759", + } + for _, refstring := range invalidRefs { + _, _, _, _, err := extractExternalDocumentReference(refstring) + if err == nil { + t.Errorf("expected non-nil error for %s, got nil", refstring) + } + } +} diff --git a/tvloader/parser2v2/parse_creation_info.go b/tvloader/parser2v2/parse_creation_info.go index 9c84404..e017c69 100644 --- a/tvloader/parser2v2/parse_creation_info.go +++ b/tvloader/parser2v2/parse_creation_info.go @@ -4,6 +4,7 @@ package parser2v2 import ( "fmt" + "strings" "github.com/spdx/tools-golang/spdx" ) @@ -16,7 +17,9 @@ func (parser *tvParser2_2) parsePairFromCreationInfo2_2(tag string, value string // create an SPDX Creation Info data struct if we don't have one already if parser.doc.CreationInfo == nil { - parser.doc.CreationInfo = &spdx.CreationInfo2_2{} + parser.doc.CreationInfo = &spdx.CreationInfo2_2{ + ExternalDocumentReferences: map[string]spdx.ExternalDocumentRef2_2{}, + } } ci := parser.doc.CreationInfo @@ -32,7 +35,17 @@ func (parser *tvParser2_2) parsePairFromCreationInfo2_2(tag string, value string case "DocumentNamespace": ci.DocumentNamespace = value case "ExternalDocumentRef": - ci.ExternalDocumentReferences = append(ci.ExternalDocumentReferences, value) + documentRefID, uri, alg, checksum, err := extractExternalDocumentReference(value) + if err != nil { + return err + } + edr := spdx.ExternalDocumentRef2_2{ + DocumentRefID: documentRefID, + URI: uri, + Alg: alg, + Checksum: checksum, + } + ci.ExternalDocumentReferences[documentRefID] = edr case "LicenseListVersion": ci.LicenseListVersion = value case "Creator": @@ -105,3 +118,57 @@ func (parser *tvParser2_2) parsePairFromCreationInfo2_2(tag string, value string return nil } + +// ===== Helper functions ===== + +func extractExternalDocumentReference(value string) (string, string, string, string, error) { + sp := strings.Split(value, " ") + // remove any that are just whitespace + keepSp := []string{} + for _, s := range sp { + ss := strings.TrimSpace(s) + if ss != "" { + keepSp = append(keepSp, ss) + } + } + + var documentRefID, uri, alg, checksum string + + // now, should have 4 items (or 3, if Alg and Checksum were joined) + // and should be able to map them + if len(keepSp) == 4 { + documentRefID = keepSp[0] + uri = keepSp[1] + alg = keepSp[2] + // check that colon is present for alg, and remove it + if !strings.HasSuffix(alg, ":") { + return "", "", "", "", fmt.Errorf("algorithm does not end with colon") + } + alg = strings.TrimSuffix(alg, ":") + checksum = keepSp[3] + } else if len(keepSp) == 3 { + documentRefID = keepSp[0] + uri = keepSp[1] + // split on colon into alg and checksum + parts := strings.SplitN(keepSp[2], ":", 2) + if len(parts) != 2 { + return "", "", "", "", fmt.Errorf("missing colon separator between algorithm and checksum") + } + alg = parts[0] + checksum = parts[1] + } else { + return "", "", "", "", fmt.Errorf("expected 4 elements, got %d", len(keepSp)) + } + + // additionally, we should be able to parse the first element as a + // DocumentRef- ID string, and we should remove that prefix + if !strings.HasPrefix(documentRefID, "DocumentRef-") { + return "", "", "", "", fmt.Errorf("expected first element to have DocumentRef- prefix") + } + documentRefID = strings.TrimPrefix(documentRefID, "DocumentRef-") + if documentRefID == "" { + return "", "", "", "", fmt.Errorf("document identifier has nothing after prefix") + } + + return documentRefID, uri, alg, checksum, nil +} diff --git a/tvloader/parser2v2/parse_creation_info_test.go b/tvloader/parser2v2/parse_creation_info_test.go index 0be6110..da29854 100644 --- a/tvloader/parser2v2/parse_creation_info_test.go +++ b/tvloader/parser2v2/parse_creation_info_test.go @@ -242,7 +242,19 @@ func TestParser2_2CanParseCreationInfoTags(t *testing.T) { // External Document Reference refs := []string{ "DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2759", - "DocumentRef-xyz-2.1.2 http://example.com/xyz-2.1.2 SHA1: d6a770ba38583ed4bb4525bd96e50461655d2760", + "DocumentRef-xyz-2.1.2 http://example.com/xyz-2.1.2 SHA1:d6a770ba38583ed4bb4525bd96e50461655d2760", + } + wantRef0 := spdx.ExternalDocumentRef2_2{ + DocumentRefID: "spdx-tool-1.2", + URI: "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301", + Alg: "SHA1", + Checksum: "d6a770ba38583ed4bb4525bd96e50461655d2759", + } + wantRef1 := spdx.ExternalDocumentRef2_2{ + DocumentRefID: "xyz-2.1.2", + URI: "http://example.com/xyz-2.1.2", + Alg: "SHA1", + Checksum: "d6a770ba38583ed4bb4525bd96e50461655d2760", } err = parser.parsePairFromCreationInfo2_2("ExternalDocumentRef", refs[0]) if err != nil { @@ -252,10 +264,22 @@ func TestParser2_2CanParseCreationInfoTags(t *testing.T) { if err != nil { t.Errorf("expected nil error, got %v", err) } - if len(parser.doc.CreationInfo.ExternalDocumentReferences) != 2 || - parser.doc.CreationInfo.ExternalDocumentReferences[0] != refs[0] || - parser.doc.CreationInfo.ExternalDocumentReferences[1] != refs[1] { - t.Errorf("got %v for ExternalDocumentReferences", parser.doc.CreationInfo.ExternalDocumentReferences) + if len(parser.doc.CreationInfo.ExternalDocumentReferences) != 2 { + t.Errorf("got %d ExternalDocumentReferences, expected %d", len(parser.doc.CreationInfo.ExternalDocumentReferences), 2) + } + gotRef0 := parser.doc.CreationInfo.ExternalDocumentReferences["spdx-tool-1.2"] + if gotRef0.DocumentRefID != wantRef0.DocumentRefID || + gotRef0.URI != wantRef0.URI || + gotRef0.Alg != wantRef0.Alg || + gotRef0.Checksum != wantRef0.Checksum { + t.Errorf("got %#v for ExternalDocumentReferences[0], wanted %#v", gotRef0, wantRef0) + } + gotRef1 := parser.doc.CreationInfo.ExternalDocumentReferences["xyz-2.1.2"] + if gotRef1.DocumentRefID != wantRef1.DocumentRefID || + gotRef1.URI != wantRef1.URI || + gotRef1.Alg != wantRef1.Alg || + gotRef1.Checksum != wantRef1.Checksum { + t.Errorf("got %#v for ExternalDocumentReferences[1], wanted %#v", gotRef1, wantRef1) } // License List Version @@ -429,3 +453,73 @@ func TestParser2_2CICreatesAnnotation(t *testing.T) { t.Errorf("pointer to new Annotation doesn't match idx 0 for doc.Annotations[]") } } + +// ===== Helper function tests ===== + +func TestCanExtractExternalDocumentReference(t *testing.T) { + refstring := "DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1:d6a770ba38583ed4bb4525bd96e50461655d2759" + wantDocumentRefID := "spdx-tool-1.2" + wantURI := "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301" + wantAlg := "SHA1" + wantChecksum := "d6a770ba38583ed4bb4525bd96e50461655d2759" + + gotDocumentRefID, gotURI, gotAlg, gotChecksum, err := extractExternalDocumentReference(refstring) + if err != nil { + t.Errorf("got non-nil error: %v", err) + } + if wantDocumentRefID != gotDocumentRefID { + t.Errorf("wanted document ref ID %s, got %s", wantDocumentRefID, gotDocumentRefID) + } + if wantURI != gotURI { + t.Errorf("wanted URI %s, got %s", wantURI, gotURI) + } + if wantAlg != gotAlg { + t.Errorf("wanted alg %s, got %s", wantAlg, gotAlg) + } + if wantChecksum != gotChecksum { + t.Errorf("wanted checksum %s, got %s", wantChecksum, gotChecksum) + } +} + +func TestCanExtractExternalDocumentReferenceWithExtraWhitespace(t *testing.T) { + refstring := " DocumentRef-spdx-tool-1.2 \t http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 \t SHA1: \t d6a770ba38583ed4bb4525bd96e50461655d2759" + wantDocumentRefID := "spdx-tool-1.2" + wantURI := "http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301" + wantAlg := "SHA1" + wantChecksum := "d6a770ba38583ed4bb4525bd96e50461655d2759" + + gotDocumentRefID, gotURI, gotAlg, gotChecksum, err := extractExternalDocumentReference(refstring) + if err != nil { + t.Errorf("got non-nil error: %v", err) + } + if wantDocumentRefID != gotDocumentRefID { + t.Errorf("wanted document ref ID %s, got %s", wantDocumentRefID, gotDocumentRefID) + } + if wantURI != gotURI { + t.Errorf("wanted URI %s, got %s", wantURI, gotURI) + } + if wantAlg != gotAlg { + t.Errorf("wanted alg %s, got %s", wantAlg, gotAlg) + } + if wantChecksum != gotChecksum { + t.Errorf("wanted checksum %s, got %s", wantChecksum, gotChecksum) + } +} + +func TestFailsExternalDocumentReferenceWithInvalidFormats(t *testing.T) { + invalidRefs := []string{ + "whoops", + "DocumentRef-", + "DocumentRef- ", + "DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301", + "DocumentRef-spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 d6a770ba38583ed4bb4525bd96e50461655d2759", + "DocumentRef-spdx-tool-1.2", + "spdx-tool-1.2 http://spdx.org/spdxdocs/spdx-tools-v1.2-3F2504E0-4F89-41D3-9A0C-0305E82C3301 SHA1:d6a770ba38583ed4bb4525bd96e50461655d2759", + } + for _, refstring := range invalidRefs { + _, _, _, _, err := extractExternalDocumentReference(refstring) + if err == nil { + t.Errorf("expected non-nil error for %s, got nil", refstring) + } + } +} -- cgit v1.2.3