From f6091982769ca08e7604a1be79afdb8902717114 Mon Sep 17 00:00:00 2001 From: RishabhBhatnagar Date: Wed, 19 Aug 2020 20:41:36 +0530 Subject: Add Support For RDFLoader Without License - The Licensing Info is Incomplete - Some other attributes are not set in the tools-golang data model. Signed-off-by: RishabhBhatnagar --- rdfloader/parser2v2/constants.go | 243 ++++++++++++++++++ rdfloader/parser2v2/parse_annotation.go | 77 ++++++ rdfloader/parser2v2/parse_creation_info.go | 54 ++++ rdfloader/parser2v2/parse_file.go | 162 ++++++++++++ rdfloader/parser2v2/parse_license.go | 138 +++++++++++ rdfloader/parser2v2/parse_other_license_info.go | 66 +++++ rdfloader/parser2v2/parse_package.go | 312 ++++++++++++++++++++++++ rdfloader/parser2v2/parse_relationship.go | 131 ++++++++++ rdfloader/parser2v2/parse_review.go | 33 +++ rdfloader/parser2v2/parse_snippet_info.go | 200 +++++++++++++++ rdfloader/parser2v2/parse_spdx_document.go | 107 ++++++++ rdfloader/parser2v2/parser.go | 309 +++++++++++++++++++++++ rdfloader/parser2v2/types.go | 26 ++ rdfloader/parser2v2/utils.go | 160 ++++++++++++ rdfloader/rdfloader.go | 20 ++ 15 files changed, 2038 insertions(+) create mode 100644 rdfloader/parser2v2/constants.go create mode 100644 rdfloader/parser2v2/parse_annotation.go create mode 100644 rdfloader/parser2v2/parse_creation_info.go create mode 100644 rdfloader/parser2v2/parse_file.go create mode 100644 rdfloader/parser2v2/parse_license.go create mode 100644 rdfloader/parser2v2/parse_other_license_info.go create mode 100644 rdfloader/parser2v2/parse_package.go create mode 100644 rdfloader/parser2v2/parse_relationship.go create mode 100644 rdfloader/parser2v2/parse_review.go create mode 100644 rdfloader/parser2v2/parse_snippet_info.go create mode 100644 rdfloader/parser2v2/parse_spdx_document.go create mode 100644 rdfloader/parser2v2/parser.go create mode 100644 rdfloader/parser2v2/types.go create mode 100644 rdfloader/parser2v2/utils.go create mode 100644 rdfloader/rdfloader.go (limited to 'rdfloader') diff --git a/rdfloader/parser2v2/constants.go b/rdfloader/parser2v2/constants.go new file mode 100644 index 0000000..fcab50d --- /dev/null +++ b/rdfloader/parser2v2/constants.go @@ -0,0 +1,243 @@ +package parser2v2 + +import "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + +var ( + // NAMESPACES + NS_SPDX = "http://spdx.org/rdf/terms#" + NS_RDFS = "http://www.w3.org/2000/01/rdf-schema#" + NS_RDF = parser.RDFNS + NS_PTR = "http://www.w3.org/2009/pointers#" + NS_DOAP = "http://usefulinc.com/ns/doap#" + + // SPDX properties + SPDX_SPEC_VERSION = NS_SPDX + "specVersion" + SPDX_DATA_LICENSE = NS_SPDX + "dataLicense" + SPDX_NAME = NS_SPDX + "name" + SPDX_EXTERNAL_DOCUMENT_REF = NS_SPDX + "externalDocumentRef" + SPDX_LICENSE_LIST_VERSION = NS_SPDX + "licenseListVersion" + SPDX_CREATOR = NS_SPDX + "creator" + SPDX_CREATED = NS_SPDX + "created" + SPDX_REVIEWED = NS_SPDX + "reviewed" + SPDX_DESCRIBES_PACKAGE = NS_SPDX + "describesPackage" + SPDX_HAS_EXTRACTED_LICENSING_INFO = NS_SPDX + "hasExtractedLicensingInfo" + SPDX_RELATIONSHIP = NS_SPDX + "relationship" + SPDX_ANNOTATION = NS_SPDX + "annotation" + SPDX_COMMENT = NS_SPDX + "comment" + SPDX_CREATION_INFO = NS_SPDX + "creationInfo" + SPDX_CHECKSUM_ALGORITHM_SHA1 = NS_SPDX + "checksumAlgorithm_sha1" + SPDX_CHECKSUM_ALGORITHM_SHA256 = NS_SPDX + "checksumAlgorithm_sha256" + SPDX_CHECKSUM_ALGORITHM_MD5 = NS_SPDX + "checksumAlgorithm_md5" + SPDX_EXTERNAL_DOCUMENT_ID = NS_SPDX + "externalDocumentId" + SPDX_SPDX_DOCUMENT = NS_SPDX + "spdxDocument" + SPDX_SPDX_DOCUMENT_CAPITALIZED = NS_SPDX + "SpdxDocument" + SPDX_CHECKSUM = NS_SPDX + "checksum" + SPDX_ANNOTATION_TYPE = NS_SPDX + "annotationType" + SPDX_ANNOTATION_TYPE_OTHER = NS_SPDX + "annotationType_other" + SPDX_ANNOTATION_TYPE_REVIEW = NS_SPDX + "annotationType_review" + SPDX_LICENSE_INFO_IN_FILE = NS_SPDX + "licenseInfoInFile" + SPDX_LICENSE_CONCLUDED = NS_SPDX + "licenseConcluded" + SPDX_LICENSE_COMMENTS = NS_SPDX + "licenseComments" + SPDX_COPYRIGHT_TEXT = NS_SPDX + "copyrightText" + SPDX_ARTIFACT_OF = NS_SPDX + "artifactOf" + SPDX_NOTICE_TEXT = NS_SPDX + "noticeText" + SPDX_FILE_CONTRIBUTOR = NS_SPDX + "fileContributor" + SPDX_FILE_DEPENDENCY = NS_SPDX + "fileDependency" + SPDX_FILE_TYPE = NS_SPDX + "fileType" + SPDX_FILE_NAME = NS_SPDX + "fileName" + SPDX_EXTRACTED_TEXT = NS_SPDX + "extractedText" + SPDX_LICENSE_ID = NS_SPDX + "licenseId" + SPDX_FILE = NS_SPDX + "File" + SPDX_PACKAGE = NS_SPDX + "Package" + SPDX_SPDX_ELEMENT = NS_SPDX + "SpdxElement" + SPDX_VERSION_INFO = NS_SPDX + "versionInfo" + SPDX_PACKAGE_FILE_NAME = NS_SPDX + "packageFileName" + SPDX_SUPPLIER = NS_SPDX + "supplier" + SPDX_ORIGINATOR = NS_SPDX + "originator" + SPDX_DOWNLOAD_LOCATION = NS_SPDX + "downloadLocation" + SPDX_NOASSERTION = NS_SPDX + "noassertion" + SPDX_NONE = NS_SPDX + "none" + SPDX_FILES_ANALYZED = NS_SPDX + "filesAnalyzed" + SPDX_PACKAGE_VERIFICATION_CODE = NS_SPDX + "packageVerificationCode" + SPDX_SOURCE_INFO = NS_SPDX + "sourceInfo" + SPDX_LICENSE_INFO_FROM_FILES = NS_SPDX + "licenseInfoFromFiles" + SPDX_LICENSE_DECLARED = NS_SPDX + "licenseDeclared" + SPDX_SUMMARY = NS_SPDX + "summary" + SPDX_DESCRIPTION = NS_SPDX + "description" + SPDX_EXTERNAL_REF = NS_SPDX + "externalRef" + SPDX_HAS_FILE = NS_SPDX + "hasFile" + SPDX_ATTRIBUTION_TEXT = NS_SPDX + "attributionText" + SPDX_PACKAGE_VERIFICATION_CODE_VALUE = NS_SPDX + "packageVerificationCodeValue" + SPDX_PACKAGE_VERIFICATION_CODE_EXCLUDED_FILE = NS_SPDX + "packageVerificationCodeExcludedFile" + SPDX_RELATED_SPDX_ELEMENT = NS_SPDX + "relatedSpdxElement" + SPDX_RELATIONSHIP_TYPE = NS_SPDX + "relationshipType" + SPDX_SNIPPET_FROM_FILE = NS_SPDX + "snippetFromFile" + SPDX_LICENSE_INFO_IN_SNIPPET = NS_SPDX + "licenseInfoInSnippet" + SPDX_RANGE = NS_SPDX + "range" + SPDX_REVIEWER = NS_SPDX + "reviewer" + SPDX_REVIEW_DATE = NS_SPDX + "reviewDate" + SPDX_SNIPPET = NS_SPDX + "Snippet" + SPDX_ALGORITHM = NS_SPDX + "algorithm" + SPDX_CHECKSUM_VALUE = NS_SPDX + "checksumValue" + SPDX_REFERENCE_CATEGORY = NS_SPDX + "referenceCategory" + SPDX_REFERENCE_CATEGORY_PACKAGE_MANAGER = NS_SPDX + "referenceCategory_packageManager" + SPDX_REFERENCE_CATEGORY_SECURITY = NS_SPDX + "referenceCategory_security" + SPDX_REFERENCE_CATEGORY_OTHER = NS_SPDX + "referenceCategory_other" + + SPDX_REFERENCE_TYPE = NS_SPDX + "referenceType" + SPDX_REFERENCE_LOCATOR = NS_SPDX + "referenceLocator" + SPDX_ANNOTATION_DATE = NS_SPDX + "annotationDate" + SPDX_ANNOTATOR = NS_SPDX + "annotator" + SPDX_MEMBER = NS_SPDX + "member" + SPDX_DISJUNCTIVE_LICENSE_SET = NS_SPDX + "DisjunctiveLicenseSet" + SPDX_CONJUNCTIVE_LICENSE_SET = NS_SPDX + "ConjunctiveLicenseSet" + SPDX_EXTRACTED_LICENSING_INFO = NS_SPDX + "ExtractedLicensingInfo" + + // RDFS properties + RDFS_COMMENT = NS_RDFS + "comment" + RDFS_SEE_ALSO = NS_RDFS + "seeAlso" + + // RDF properties + RDF_TYPE = NS_RDF + "type" + + // DOAP properties + DOAP_HOMEPAGE = NS_DOAP + "homepage" + DOAP_NAME = NS_DOAP + "name" + + // PTR properties + PTR_START_END_POINTER = NS_PTR + "StartEndPointer" + PTR_START_POINTER = NS_PTR + "startPointer" + PTR_BYTE_OFFSET_POINTER = NS_PTR + "ByteOffsetPointer" + PTR_LINE_CHAR_POINTER = NS_PTR + "LineCharPointer" + PTR_REFERENCE = NS_PTR + "reference" + PTR_OFFSET = NS_PTR + "offset" + PTR_LINE_NUMBER = NS_PTR + "lineNumber" + PTR_END_POINTER = NS_PTR + "endPointer" + + // prefixes + PREFIX_RELATIONSHIP_TYPE = "relationshipType_" +) + +func AllRelationshipTypes() []string { + return []string{ + "amendment", "ancestorOf", "buildDependencyOf", "buildToolOf", + "containedBy", "contains", "copyOf", "dataFile", "dataFileOf", + "dependencyManifestOf", "dependencyOf", "dependsOn", "descendantOf", + "describedBy", "describes", "devDependencyOf", "devToolOf", + "distributionArtifact", "documentation", "dynamicLink", "exampleOf", + "expandedFromArchive", "fileAdded", "fileDeleted", "fileModified", + "generatedFrom", "generates", "hasPrerequisite", "metafileOf", + "optionalComponentOf", "optionalDependencyOf", "other", "packageOf", + "patchApplied", "patchFor", "prerequisiteFor", "providedDependencyOf", + "runtimeDependencyOf", "staticLink", "testDependencyOf", "testOf", + "testToolOf", "testcaseOf", "variantOf", + } +} + +func AllStandardLicenseIDS() []string { + return []string{ + "0BSD", "389-exception", "AAL", "Abstyles", "Adobe-2006", "Adobe-Glyph", + "ADSL", "AFL-1.1", "AFL-1.2", "AFL-2.0", "AFL-2.1", "AFL-3.0", "Afmparse", + "AGPL-1.0-only", "AGPL-1.0-or-later", "AGPL-1.0", "AGPL-3.0-only", + "AGPL-3.0-or-later", "AGPL-3.0", "Aladdin", "AMDPLPA", "AML", "AMPAS", + "ANTLR-PD", "Apache-1.0", "Apache-1.1", "Apache-2.0", "APAFML", "APL-1.0", + "APSL-1.0", "APSL-1.1", "APSL-1.2", "APSL-2.0", "Artistic-1.0-cl8", + "Artistic-1.0-Perl", "Artistic-1.0", "Artistic-2.0", "", + "Autoconf-exception-2.0", "Autoconf-exception-3.0", "Bahyph", "Barr", + "Beerware", "Bison-exception-2.2", "BitTorrent-1.0", "BitTorrent-1.1", + "blessing", "BlueOak-1.0.0", "Bootloader-exception", "Borceux", "BSD-1-Clause", + "BSD-2-Clause-FreeBSD", "BSD-2-Clause-NetBSD", "BSD-2-Clause-Patent", + "BSD-2-Clause-Views", "BSD-2-Clause", "BSD-3-Clause-Attribution", + "BSD-3-Clause-Clear", "BSD-3-Clause-LBNL", + "BSD-3-Clause-No-Nuclear-License-2014", "BSD-3-Clause-No-Nuclear-License", + "BSD-3-Clause-No-Nuclear-Warranty", "BSD-3-Clause-Open-MPI", "BSD-3-Clause", + "BSD-4-Clause-UC", "BSD-4-Clause", "BSD-Protection", "BSD-Source-Code", + "BSL-1.0", "bzip2-1.0.5", "bzip2-1.0.6", "CAL-1.0-Combined-Work-Exception", + "CAL-1.0", "Caldera", "CATOSL-1.1", "CC-BY-1.0", "CC-BY-2.0", "CC-BY-2.5", + "CC-BY-3.0-AT", "CC-BY-3.0", "CC-BY-4.0", "CC-BY-NC-1.0", "CC-BY-NC-2.0", + "CC-BY-NC-2.5", "CC-BY-NC-3.0", "CC-BY-NC-4.0", "CC-BY-NC-ND-1.0", + "CC-BY-NC-ND-2.0", "CC-BY-NC-ND-2.5", "CC-BY-NC-ND-3.0-IGO", "CC-BY-NC-ND-3.0", + "CC-BY-NC-ND-4.0", "CC-BY-NC-SA-1.0", "CC-BY-NC-SA-2.0", "CC-BY-NC-SA-2.5", + "CC-BY-NC-SA-3.0", "CC-BY-NC-SA-4.0", "CC-BY-ND-1.0", "CC-BY-ND-2.0", + "CC-BY-ND-2.5", "CC-BY-ND-3.0", "CC-BY-ND-4.0", "CC-BY-SA-1.0", "CC-BY-SA-2.0", + "CC-BY-SA-2.5", "CC-BY-SA-3.0-AT", "CC-BY-SA-3.0", "CC-BY-SA-4.0", "CC-PDDC", + "CC0-1.0", "CDDL-1.0", "CDDL-1.1", "CDLA-Permissive-1.0", "CDLA-Sharing-1.0", + "CECILL-1.0", "CECILL-1.1", "CECILL-2.0", "CECILL-2.1", "CECILL-B", "CECILL-C", + "CERN-OHL-1.1", "CERN-OHL-1.2", "CERN-OHL-P-2.0", "CERN-OHL-S-2.0", + "CERN-OHL-W-2.0", "ClArtistic", "Classpath-exception-2.0", + "CLISP-exception-2.0", "CNRI-Jython", "CNRI-Python-GPL-Compatible", + "CNRI-Python", "Condor-1.1", "copyleft-next-0.3.0", "copyleft-next-0.3.1", + "CPAL-1.0", "CPL-1.0", "CPOL-1.02", "Crossword", "CrystalStacker", + "CUA-OPL-1.0", "Cube", "curl", "D-FSL-1.0", "diffmark", + "DigiRule-FOSS-exception", "DOC", "Dotseqn", "DSDP", "dvipdfm", "ECL-1.0", + "ECL-2.0", "eCos-2.0", "eCos-exception-2.0", "EFL-1.0", "EFL-2.0", "eGenix", + "Entessa", "EPICS", "EPL-1.0", "EPL-2.0", "ErlPL-1.1", "etalab-2.0", + "EUDatagrid", "EUPL-1.0", "EUPL-1.1", "EUPL-1.2", "Eurosym", "Fair", + "Fawkes-Runtime-exception", "FLTK-exception", "Font-exception-2.0", + "Frameworx-1.0", "FreeImage", "freertos-exception-2.0", "FSFAP", "FSFUL", + "FSFULLR", "FTL", "GCC-exception-2.0", "GCC-exception-3.1", + "GFDL-1.1-invariants-only", "GFDL-1.1-invariants-or-later", + "GFDL-1.1-no-invariants-only", "GFDL-1.1-no-invariants-or-later", + "GFDL-1.1-only", "GFDL-1.1-or-later", "GFDL-1.1", "GFDL-1.2-invariants-only", + "GFDL-1.2-invariants-or-later", "GFDL-1.2-no-invariants-only", + "GFDL-1.2-no-invariants-or-later", "GFDL-1.2-only", "GFDL-1.2-or-later", + "GFDL-1.2", "GFDL-1.3-invariants-only", "GFDL-1.3-invariants-or-later", + "GFDL-1.3-no-invariants-only", "GFDL-1.3-no-invariants-or-later", + "GFDL-1.3-only", "GFDL-1.3-or-later", "GFDL-1.3", "Giftware", "GL2PS", "Glide", + "Glulxe", "GLWTPL", "gnu-javamail-exception", "gnuplot", "GPL-1.0+", + "GPL-1.0-only", "GPL-1.0-or-later", "GPL-1.0", "GPL-2.0+", "GPL-2.0-only", + "GPL-2.0-or-later", "GPL-2.0-with-autoconf-exception", + "GPL-2.0-with-bison-exception", "GPL-2.0-with-classpath-exception", + "GPL-2.0-with-font-exception", "GPL-2.0-with-GCC-exception", "GPL-2.0", + "GPL-3.0+", "GPL-3.0-linking-exception", "GPL-3.0-linking-source-exception", + "GPL-3.0-only", "GPL-3.0-or-later", "GPL-3.0-with-autoconf-exception", + "GPL-3.0-with-GCC-exception", "GPL-3.0", "GPL-CC-1.0", "gSOAP-1.3b", + "HaskellReport", "Hippocratic-2.1", "HPND-sell-variant", "HPND", + "i2p-gpl-java-exception", "IBM-pibs", "ICU", "IJG", "ImageMagick", "iMatix", + "Imlib2", "Info-ZIP", "Intel-ACPI", "Intel", "Interbase-1.0", "IPA", "IPL-1.0", + "ISC", "JasPer-2.0", "JPNIC", "JSON", "LAL-1.2", "LAL-1.3", "Latex2e", + "Leptonica", "LGPL-2.0+", "LGPL-2.0-only", "LGPL-2.0-or-later", "LGPL-2.0", + "LGPL-2.1+", "LGPL-2.1-only", "LGPL-2.1-or-later", "LGPL-2.1", "LGPL-3.0+", + "LGPL-3.0-linking-exception", "LGPL-3.0-only", "LGPL-3.0-or-later", "LGPL-3.0", + "LGPLLR", "libpng-2.0", "Libpng", "libselinux-1.0", "libtiff", + "Libtool-exception", "licenses", "LiLiQ-P-1.1", "LiLiQ-R-1.1", + "LiLiQ-Rplus-1.1", "Linux-OpenIB", "Linux-syscall-note", "LLVM-exception", + "LPL-1.0", "LPL-1.02", "LPPL-1.0", "LPPL-1.1", "LPPL-1.2", "LPPL-1.3a", + "LPPL-1.3c", "LZMA-exception", "MakeIndex", "mif-exception", "MirOS", "MIT-0", + "MIT-advertising", "MIT-CMU", "MIT-enna", "MIT-feh", "MIT", "MITNFA", + "Motosoto", "mpich2", "MPL-1.0", "MPL-1.1", "MPL-2.0-no-copyleft-exception", + "MPL-2.0", "MS-PL", "MS-RL", "MTLL", "MulanPSL-1.0", "MulanPSL-2.0", "Multics", + "Mup", "NASA-1.3", "Naumen", "NBPL-1.0", "NCGL-UK-2.0", "NCSA", "Net-SNMP", + "NetCDF", "Newsletr", "NGPL", "NIST-PD-fallback", "NIST-PD", "NLOD-1.0", + "NLPL", "Nokia-Qt-exception-1.1", "Nokia", "NOSL", "Noweb", "NPL-1.0", + "NPL-1.1", "NPOSL-3.0", "NRL", "NTP-0", "NTP", "Nunit", "O-UDA-1.0", + "OCaml-LGPL-linking-exception", "OCCT-exception-1.0", "OCCT-PL", "OCLC-2.0", + "ODbL-1.0", "ODC-By-1.0", "OFL-1.0-no-RFN", "OFL-1.0-RFN", "OFL-1.0", + "OFL-1.1-no-RFN", "OFL-1.1-RFN", "OFL-1.1", "OGC-1.0", "OGL-Canada-2.0", + "OGL-UK-1.0", "OGL-UK-2.0", "OGL-UK-3.0", "OGTSL", "OLDAP-1.1", "OLDAP-1.2", + "OLDAP-1.3", "OLDAP-1.4", "OLDAP-2.0.1", "OLDAP-2.0", "OLDAP-2.1", + "OLDAP-2.2.1", "OLDAP-2.2.2", "OLDAP-2.2", "OLDAP-2.3", "OLDAP-2.4", + "OLDAP-2.5", "OLDAP-2.6", "OLDAP-2.7", "OLDAP-2.8", "OML", "", + "OpenJDK-assembly-exception-1.0", "OpenSSL", "openvpn-openssl-exception", + "OPL-1.0", "OSET-PL-2.1", "OSL-1.0", "OSL-1.1", "OSL-2.0", "OSL-2.1", + "OSL-3.0", "Parity-6.0.0", "Parity-7.0.0", "PDDL-1.0", "PHP-3.0", "PHP-3.01", + "Plexus", "PolyForm-Noncommercial-1.0.0", "PolyForm-Small-Business-1.0.0", + "PostgreSQL", "PS-or-PDF-font-exception-20170817", "PSF-2.0", "psfrag", + "psutils", "Python-2.0", "Qhull", "QPL-1.0", "Qt-GPL-exception-1.0", + "Qt-LGPL-exception-1.1", "Qwt-exception-1.0", "Rdisc", "RHeCos-1.1", "RPL-1.1", + "RPL-1.5", "RPSL-1.0", "RSA-MD", "RSCPL", "Ruby", "SAX-PD", "Saxpath", "SCEA", + "Sendmail-8.23", "Sendmail", "SGI-B-1.0", "SGI-B-1.1", "SGI-B-2.0", "SHL-0.5", + "SHL-0.51", "SHL-2.0", "SHL-2.1", "SimPL-2.0", "SISSL-1.2", "SISSL", + "Sleepycat", "SMLNJ", "SMPPL", "SNIA", "Spencer-86", "Spencer-94", + "Spencer-99", "SPL-1.0", "SSH-OpenSSH", "SSH-short", "SSPL-1.0", + "StandardML-NJ", "SugarCRM-1.1.3", "Swift-exception", "SWL", "TAPR-OHL-1.0", + "TCL", "TCP-wrappers", "TMate", "TORQUE-1.1", "TOSL", "TU-Berlin-1.0", + "TU-Berlin-2.0", "u-boot-exception-2.0", "UCL-1.0", "Unicode-DFS-2015", + "Unicode-DFS-2016", "Unicode-TOU", "Universal-FOSS-exception-1.0", "Unlicense", + "UPL-1.0", "Vim", "VOSTROM", "VSL-1.0", "W3C-19980720", "W3C-20150513", "W3C", + "Watcom-1.0", "Wsuipa", "WTFPL", "WxWindows-exception-3.1", "wxWindows", "X11", + "Xerox", "XFree86-1.1", "xinetd", "Xnet", "xpp", "XSkat", "YPL-1.0", "YPL-1.1", + "Zed", "Zend-2.0", "Zimbra-1.3", "Zimbra-1.4", "zlib-acknowledgement", "Zlib", + "ZPL-1.1", "ZPL-2.0", "ZPL-2.1", + } +} diff --git a/rdfloader/parser2v2/parse_annotation.go b/rdfloader/parser2v2/parse_annotation.go new file mode 100644 index 0000000..01d762b --- /dev/null +++ b/rdfloader/parser2v2/parse_annotation.go @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +package parser2v2 + +import ( + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + "github.com/spdx/tools-golang/spdx" +) + +// creates a new instance of annotation and sets the annotation attributes +// associated with the given node. +// The newly created annotation is appended to the doc. +func (parser *rdfParser2_2) parseAnnotationFromNode(node *gordfParser.Node) (err error) { + ann := &spdx.Annotation2_2{} + for _, subTriple := range parser.nodeToTriples[node.String()] { + switch subTriple.Predicate.ID { + case SPDX_ANNOTATOR: + // cardinality: exactly 1 + err = setAnnotatorFromString(subTriple.Object.ID, ann) + case SPDX_ANNOTATION_DATE: + // cardinality: exactly 1 + ann.AnnotationDate = subTriple.Object.ID + case RDFS_COMMENT: + // cardinality: exactly 1 + ann.AnnotationComment = subTriple.Object.ID + case SPDX_ANNOTATION_TYPE: + // cardinality: exactly 1 + err = setAnnotationType(subTriple.Object.ID, ann) + case RDF_TYPE: + // cardinality: exactly 1 + continue + default: + err = fmt.Errorf("unknown predicate %s while parsing annotation", subTriple.Predicate.ID) + } + if err != nil { + return err + } + } + return setAnnotationToParser(parser, ann) +} + +func setAnnotationToParser(parser *rdfParser2_2, annotation *spdx.Annotation2_2) error { + if parser.doc == nil { + return fmt.Errorf("uninitialized spdx document") + } + if parser.doc.Annotations == nil { + parser.doc.Annotations = []*spdx.Annotation2_2{} + } + parser.doc.Annotations = append(parser.doc.Annotations, annotation) + return nil +} + +func setAnnotatorFromString(annotatorString string, ann *spdx.Annotation2_2) error { + subkey, subvalue, err := ExtractSubs(annotatorString, ":") + if err != nil { + return err + } + if subkey == "Person" || subkey == "Organization" || subkey == "Tool" { + ann.AnnotatorType = subkey + ann.Annotator = subvalue + return nil + } + return fmt.Errorf("unrecognized Annotator type %v while parsing annotation", subkey) +} + +func setAnnotationType(annType string, ann *spdx.Annotation2_2) error { + switch annType { + case SPDX_ANNOTATION_TYPE_OTHER: + ann.AnnotationType = "OTHER" + case SPDX_ANNOTATION_TYPE_REVIEW: + ann.AnnotationType = "REVIEW" + default: + return fmt.Errorf("unknown annotation type %s", annType) + } + return nil +} diff --git a/rdfloader/parser2v2/parse_creation_info.go b/rdfloader/parser2v2/parse_creation_info.go new file mode 100644 index 0000000..78104bd --- /dev/null +++ b/rdfloader/parser2v2/parse_creation_info.go @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +package parser2v2 + +import ( + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + "github.com/spdx/tools-golang/spdx" +) + +// Cardinality: Mandatory, one. +func (parser *rdfParser2_2) parseCreationInfoFromNode(ci *spdx.CreationInfo2_2, node *gordfParser.Node) error { + for _, triple := range parser.nodeToTriples[node.String()] { + switch triple.Predicate.ID { + case SPDX_LICENSE_LIST_VERSION: // 2.7 + // cardinality: max 1 + ci.LicenseListVersion = triple.Object.ID + case SPDX_CREATOR: // 2.8 + // cardinality: min 1 + err := setCreator(triple.Object.ID, ci) + if err != nil { + return err + } + case SPDX_CREATED: // 2.9 + // cardinality: exactly 1 + ci.Created = triple.Object.ID + case RDFS_COMMENT: // 2.10 + ci.CreatorComment = triple.Object.ID + case RDF_TYPE: + continue + default: + return fmt.Errorf("unknown predicate %v while parsing a creation info", triple.Predicate) + } + } + return nil +} + +func setCreator(creator string, ci *spdx.CreationInfo2_2) error { + entityType, entity, err := ExtractSubs(creator, ":") + if err != nil { + return fmt.Errorf("error setting creator of a creation info: %s", err) + } + switch entityType { + case "Person": + ci.CreatorPersons = append(ci.CreatorPersons, entity) + case "Organization": + ci.CreatorOrganizations = append(ci.CreatorOrganizations, entity) + case "Tool": + ci.CreatorTools = append(ci.CreatorTools, entity) + default: + return fmt.Errorf("unknown creatorType %v in a creation info", entityType) + } + return nil +} diff --git a/rdfloader/parser2v2/parse_file.go b/rdfloader/parser2v2/parse_file.go new file mode 100644 index 0000000..d99f1f1 --- /dev/null +++ b/rdfloader/parser2v2/parse_file.go @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +package parser2v2 + +import ( + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + "github.com/spdx/tools-golang/spdx" + "strings" +) + +// returns a file instance and the error if any encountered. +func (parser *rdfParser2_2) getFileFromNode(fileNode *gordfParser.Node) (file *spdx.File2_2, err error) { + file = &spdx.File2_2{} + + err = setFileIdentifier(fileNode.ID, file, parser) // 4.2 + if err != nil { + return nil, err + } + + for _, subTriple := range parser.nodeToTriples[fileNode.String()] { + switch subTriple.Predicate.ID { + case SPDX_FILE_NAME: // 4.1 + // cardinality: exactly 1 + file.FileName = subTriple.Object.ID + case SPDX_NAME: + // cardinality: exactly 1 + // todo: check where it will be set in the golang-tools spdx-data-model + case RDF_TYPE: + // cardinality: exactly 1 + case SPDX_FILE_TYPE: // 4.3 + // cardinality: min 0 + fileType := "" + fileType, err = parser.getFileTypeFromUri(subTriple.Object.ID) + file.FileType = append(file.FileType, fileType) + case SPDX_CHECKSUM: // 4.4 + // cardinality: min 1 + err = parser.setFileChecksumFromNode(file, subTriple.Object) + case SPDX_LICENSE_CONCLUDED: // 4.5 + // cardinality: (exactly 1 anyLicenseInfo) or (None) or (Noassertion) + file.LicenseConcluded, err = parser.getLicenseFromTriple(subTriple) + case SPDX_LICENSE_INFO_IN_FILE: // 4.6 + // cardinality: min 1 + lastPart := getLastPartOfURI(subTriple.Object.ID) + file.LicenseInfoInFile = append(file.LicenseInfoInFile, lastPart) + case SPDX_LICENSE_COMMENTS: // 4.7 + // cardinality: max 1 + file.LicenseComments = subTriple.Object.ID + case SPDX_COPYRIGHT_TEXT: // 4.8 + // cardinality: exactly 1 + file.FileCopyrightText = subTriple.Object.ID + case SPDX_LICENSE_INFO_FROM_FILES: + // todo: implement it. It is not defined in the tools-golang model. + // deprecated artifactOf (see sections 4.9, 4.10, 4.11) + case SPDX_ARTIFACT_OF: + // cardinality: min 0 + var artifactOf *spdx.ArtifactOfProject2_2 + artifactOf, err = parser.getArtifactFromNode(subTriple.Object) + file.ArtifactOfProjects = append(file.ArtifactOfProjects, artifactOf) + case RDFS_COMMENT: // 4.12 + // cardinality: max 1 + file.FileComment = subTriple.Object.ID + case SPDX_NOTICE_TEXT: // 4.13 + // cardinality: max 1 + file.FileNotice = subTriple.Object.ID + case SPDX_FILE_CONTRIBUTOR: // 4.14 + // cardinality: min 0 + file.FileContributor = append(file.FileContributor, subTriple.Object.ID) + case SPDX_FILE_DEPENDENCY: + // cardinality: min 0 + file, err := parser.getFileFromNode(subTriple.Object) + if err != nil { + return nil, fmt.Errorf("error setting a file dependency in a file: %v", err) + } + parser.files[file.FileSPDXIdentifier] = file + case SPDX_ATTRIBUTION_TEXT: + // cardinality: min 0 + file.FileAttributionTexts = append(file.FileAttributionTexts, subTriple.Object.ID) + case SPDX_ANNOTATION: // unknown section + err = parser.parseAnnotationFromNode(subTriple.Object) + case SPDX_RELATIONSHIP: // unknown section + err = parser.parseRelationship(subTriple) + default: + return nil, fmt.Errorf("unknown triple predicate id %s", subTriple.Predicate.ID) + } + if err != nil { + return nil, err + } + } + return file, nil +} + +func (parser *rdfParser2_2) setFileChecksumFromNode(file *spdx.File2_2, checksumNode *gordfParser.Node) error { + checksumAlgorithm, checksumValue, err := parser.getChecksumFromNode(checksumNode) + if err != nil { + return nil + } + switch checksumAlgorithm { + case "MD5": + file.FileChecksumMD5 = checksumValue + case "SHA1": + file.FileChecksumSHA1 = checksumValue + case "SHA256": + file.FileChecksumSHA256 = checksumValue + case "": + return fmt.Errorf("empty checksum algorithm and value") + default: + return fmt.Errorf("unknown checksumAlgorithm %s while parsing a file", checksumAlgorithm) + } + return nil +} + +func (parser *rdfParser2_2) getArtifactFromNode(node *gordfParser.Node) (*spdx.ArtifactOfProject2_2, error) { + artifactOf := &spdx.ArtifactOfProject2_2{} + // setting artifactOfProjectURI attribute (which is optional) + if node.NodeType == gordfParser.IRI { + artifactOf.URI = node.ID + } + // parsing rest triples and attributes of the artifact. + for _, triple := range parser.nodeToTriples[node.String()] { + switch triple.Predicate.ID { + case RDF_TYPE: + case DOAP_HOMEPAGE: + artifactOf.HomePage = triple.Object.ID + case DOAP_NAME: + artifactOf.Name = triple.Object.ID + default: + return nil, fmt.Errorf("error parsing artifactOf predicate %s", triple.Predicate.ID) + } + } + return artifactOf, nil +} + +func (parser *rdfParser2_2) getFileTypeFromUri(uri string) (string, error) { + // fileType is given as a uri. for example: http://spdx.org/rdf/terms#fileType_text + lastPart := getLastPartOfURI(uri) + if !strings.HasPrefix(lastPart, "fileType_") { + return "", fmt.Errorf("fileType Uri must begin with fileTYpe_. found: %s", lastPart) + } + return strings.TrimPrefix(lastPart, "fileType_"), nil +} + +// populates parser.doc.UnpackagedFiles by a list of files which are not +// associated with a package by the hasFile attribute +// assumes: all the packages are already parsed. +func (parser *rdfParser2_2) setUnpackagedFiles() { + for fileID := range parser.files { + if !parser.assocWithPackage[fileID] { + parser.doc.UnpackagedFiles[fileID] = parser.files[fileID] + } + } +} + +func setFileIdentifier(idURI string, file *spdx.File2_2, parser *rdfParser2_2) (err error) { + idURI = strings.TrimSpace(idURI) + uriFragment := getLastPartOfURI(idURI) + file.FileSPDXIdentifier, err = ExtractElementID(uriFragment) + if err != nil { + return fmt.Errorf("error setting file identifier: %s", err) + } + return nil +} diff --git a/rdfloader/parser2v2/parse_license.go b/rdfloader/parser2v2/parse_license.go new file mode 100644 index 0000000..ffd3ccd --- /dev/null +++ b/rdfloader/parser2v2/parse_license.go @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +package parser2v2 + +import ( + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + "github.com/RishabhBhatnagar/gordf/rdfwriter" + "strings" +) + +// either single tag or a compound license with member combination of single tags. +// todo: allow all types of licenses. +func (parser *rdfParser2_2) getLicenseFromTriple(triple *gordfParser.Triple) (licenseConcluded string, err error) { + licenseShortIdentifier := getLicenseStringFromURI(triple.Object.ID) + // return if license is None|Noassertion + if licenseShortIdentifier == "NONE" || licenseShortIdentifier == "NOASSERTION" { + return licenseShortIdentifier, nil + } + + // return if the license tag is not associated with any other triples. + if len(parser.nodeToTriples[triple.Object.String()]) == 0 { + return licenseShortIdentifier, nil + } + + // no need to parse standard licenses as they have constant fields. + // return if the license is among the standard licenses. + for _, stdLicenseId := range AllStandardLicenseIDS() { + if stdLicenseId == licenseShortIdentifier { + return licenseShortIdentifier, nil + } + } + + // apart from the license being in the uri form, this function allows + // license to be a collection of licenses joined by a single operator + // (either conjunction or disjunction) + + typeTriples := rdfwriter.FilterTriples(parser.gordfParserObj.Triples, &triple.Object.ID, &RDF_TYPE, nil) + if len(typeTriples) == 0 { + return "", fmt.Errorf("node(%v) not associated with a type triple", triple.Object) + } + if len(typeTriples) > 1 { + return "", fmt.Errorf("node is associated with more than one type triple") + } + switch typeTriples[0].Object.ID { + case SPDX_DISJUNCTIVE_LICENSE_SET, SPDX_CONJUNCTIVE_LICENSE_SET: + + case SPDX_EXTRACTED_LICENSING_INFO: + err = parser.parseOtherLicenseFromNode(triple.Object) + if err != nil { + return "", err + } + othLic := parser.doc.OtherLicenses[len(parser.doc.OtherLicenses)-1] + return othLic.LicenseIdentifier, nil + default: + return "", fmt.Errorf("not implemented error: cannot parse %s", typeTriples[0].Object) + } + return parser.getLicenseFromLicenseSetNode(triple.Object) +} + +// Given the license URI, returns the name of the license defined +// in the last part of the uri. +// This function is susceptible to false-positives. +func getLicenseStringFromURI(uri string) string { + licenseEnd := strings.TrimSpace(getLastPartOfURI(uri)) + lower := strings.ToLower(licenseEnd) + if lower == "none" || lower == "noassertion" { + return strings.ToUpper(licenseEnd) + } + return licenseEnd +} + +// returns the checksum algorithm and it's value +// In the newer versions, these two strings will be bound to a single checksum struct +// whose pointer will be returned. +func (parser *rdfParser2_2) getChecksumFromNode(checksumNode *gordfParser.Node) (algorithm string, value string, err error) { + var checksumValue, checksumAlgorithm string + for _, checksumTriple := range parser.nodeToTriples[checksumNode.String()] { + switch checksumTriple.Predicate.ID { + case RDF_TYPE: + continue + case SPDX_CHECKSUM_VALUE: + // cardinality: exactly 1 + checksumValue = strings.TrimSpace(checksumTriple.Object.ID) + case SPDX_ALGORITHM: + // cardinality: exactly 1 + checksumAlgorithm, err = parser.getAlgorithmFromURI(checksumTriple.Object.ID) + if err != nil { + return + } + } + } + return checksumAlgorithm, checksumValue, nil +} + +func (parser *rdfParser2_2) getAlgorithmFromURI(algorithmURI string) (checksumAlgorithm string, err error) { + fragment := getLastPartOfURI(algorithmURI) + if !strings.HasPrefix(fragment, "checksumAlgorithm") { + return "", fmt.Errorf("checksum algorithm uri must begin with checksumAlgorithm. found %s", fragment) + } + algorithm := strings.TrimPrefix(fragment, "checksumAlgorithm_") + algorithm = strings.ToLower(strings.TrimSpace(algorithm)) + switch algorithm { + case "md2", "md4", "md5", "md6": + checksumAlgorithm = strings.ToUpper(algorithm) + case "sha1", "sha224", "sha256", "sha384", "sha512": + checksumAlgorithm = strings.ToUpper(algorithm) + default: + return "", fmt.Errorf("unknown checksum algorithm %s", algorithm) + } + return +} + +func (parser *rdfParser2_2) getLicenseFromLicenseSetNode(node *gordfParser.Node) (s string, err error) { + typeLicenseSet := "undefined" + var licenseSets []string + for _, lst := range parser.nodeToTriples[node.String()] { + switch lst.Predicate.ID { + case RDF_TYPE: + _, typeLicenseSet, err = ExtractSubs(lst.Object.ID, "#") + if err != nil { + return + } + case SPDX_MEMBER: + licenseSets = append(licenseSets, getLicenseStringFromURI(lst.Object.ID)) + default: + return "", fmt.Errorf("undefined predicate %s while parsing license set", lst.Predicate.ID) + } + } + switch typeLicenseSet { + case "DisjunctiveLicenseSet": + return strings.Join(licenseSets, " OR "), nil + case "ConjunctiveLicenseSet": + return strings.Join(licenseSets, " AND "), nil + default: + return "", fmt.Errorf("unknown licenseSet type %s", typeLicenseSet) + } +} diff --git a/rdfloader/parser2v2/parse_other_license_info.go b/rdfloader/parser2v2/parse_other_license_info.go new file mode 100644 index 0000000..dd1c193 --- /dev/null +++ b/rdfloader/parser2v2/parse_other_license_info.go @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +package parser2v2 + +import ( + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + "github.com/spdx/tools-golang/spdx" + "strings" +) + +func (parser *rdfParser2_2) getExternalLicensingInfoFromNode(node *gordfParser.Node) (*spdx.OtherLicense2_2, error) { + lic := &spdx.OtherLicense2_2{} + licensePrefix := "LicenseRef-" + for _, triple := range parser.nodeToTriples[node.String()] { + switch triple.Predicate.ID { + case RDF_TYPE: + continue + case SPDX_LICENSE_ID: + fragment := strings.TrimSpace(getLastPartOfURI(triple.Subject.ID)) + if !strings.HasPrefix(fragment, licensePrefix) { + return nil, fmt.Errorf("license ID must be of type \"LicenseRef-[idstring]\"; found %s", fragment) + } + lic.LicenseIdentifier = strings.TrimSuffix(fragment, licensePrefix) + case SPDX_EXTRACTED_TEXT: + lic.ExtractedText = triple.Object.ID + case SPDX_NAME: + lic.LicenseName = triple.Object.ID + case RDFS_SEE_ALSO: + lic.LicenseCrossReferences = append(lic.LicenseCrossReferences, triple.Object.ID) + case RDFS_COMMENT: + lic.LicenseComment = triple.Object.ID + default: + return nil, fmt.Errorf("unknown predicate %v while parsing extractedLicensingInfo", triple.Predicate) + } + } + return lic, nil +} + +// parses the other license and appends it to the doc if no error is encountered. +func (parser *rdfParser2_2) parseOtherLicenseFromNode(node *gordfParser.Node) error { + ol := &spdx.OtherLicense2_2{} + ol.LicenseIdentifier = getLicenseStringFromURI(node.ID) // 6.1 + for _, triple := range parser.nodeToTriples[node.String()] { + switch triple.Predicate.ID { + case RDF_TYPE: + continue + case SPDX_EXTRACTED_TEXT: // 6.2 + ol.ExtractedText = triple.Object.ID + case SPDX_NAME: // 6.3 + ol.LicenseName = triple.Object.ID + case RDFS_SEE_ALSO: // 6.4 + ol.LicenseCrossReferences = append(ol.LicenseCrossReferences, triple.Object.ID) + case RDFS_COMMENT: // 6.5 + ol.LicenseComment = triple.Object.ID + case SPDX_LICENSE_ID: + // override licenseId from the rdf:about tag. + ol.LicenseIdentifier = getLicenseStringFromURI(triple.Object.ID) + default: + return fmt.Errorf("unknown predicate (%s) while parsing other license", triple.Predicate.ID) + } + } + + parser.doc.OtherLicenses = append(parser.doc.OtherLicenses, ol) + return nil +} diff --git a/rdfloader/parser2v2/parse_package.go b/rdfloader/parser2v2/parse_package.go new file mode 100644 index 0000000..484804f --- /dev/null +++ b/rdfloader/parser2v2/parse_package.go @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +package parser2v2 + +import ( + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + "github.com/spdx/tools-golang/spdx" + "strings" +) + +func (parser *rdfParser2_2) getPackageFromNode(packageNode *gordfParser.Node) (pkg *spdx.Package2_2, err error) { + pkg = &spdx.Package2_2{} // new package which will be returned + + // setting the SPDXIdentifier for the package. + eId, err := ExtractElementID(getLastPartOfURI(packageNode.ID)) + if err != nil { + return nil, fmt.Errorf("error extracting elementID of a package identifier: %v", err) + } + pkg.PackageSPDXIdentifier = eId // 3.2 + + // iterate over all the triples associated with the provided package packageNode. + for _, subTriple := range parser.nodeToTriples[packageNode.String()] { + switch subTriple.Predicate.ID { + case RDF_TYPE: + // cardinality: exactly 1 + continue + case SPDX_NAME: // 3.1 + // cardinality: exactly 1 + pkg.PackageName = subTriple.Object.ID + case SPDX_VERSION_INFO: // 3.3 + // cardinality: max 1 + pkg.PackageVersion = subTriple.Object.ID + case SPDX_PACKAGE_FILE_NAME: // 3.4 + // cardinality: max 1 + pkg.PackageFileName = subTriple.Object.ID + case SPDX_SUPPLIER: // 3.5 + // cardinality: max 1 + err = setPackageSupplier(pkg, subTriple.Object.ID) + if err != nil { + return nil, err + } + case SPDX_ORIGINATOR: // 3.6 + // cardinality: max 1 + err = setPackageOriginator(pkg, subTriple.Object.ID) + if err != nil { + return nil, err + } + case SPDX_DOWNLOAD_LOCATION: // 3.7 + // cardinality: exactly 1 + err = setDocumentLocationFromURI(subTriple.Object.ID, pkg) + if err != nil { + return nil, err + } + case SPDX_FILES_ANALYZED: // 3.8 + // cardinality: max 1 + err = setFilesAnalyzed(subTriple.Object.ID, pkg) + if err != nil { + err = fmt.Errorf("error setting a filesAnalyzed attribute of a package: %v", err) + } + case SPDX_PACKAGE_VERIFICATION_CODE: // 3.9 + // cardinality: max 1 + err = parser.setPackageVerificationCode(pkg, subTriple.Object) + case SPDX_CHECKSUM: // 3.10 + // cardinality: min 0 + err = setPackageChecksum(parser, pkg, subTriple.Object) + case DOAP_HOMEPAGE: // 3.11 + // cardinality: max 1 + // homepage must be a valid Uri + if !isUriValid(subTriple.Object.ID) { + return nil, fmt.Errorf("invalid uri %s while parsing doap_homepage in a package", subTriple.Object.ID) + } + pkg.PackageHomePage = subTriple.Object.ID + case SPDX_SOURCE_INFO: // 3.12 + // cardinality: max 1 + pkg.PackageSourceInfo = subTriple.Object.ID + case SPDX_LICENSE_CONCLUDED: // 3.13 + // cardinality: exactly 1 + licenseConcluded, err := parser.getLicenseFromTriple(subTriple) + if err != nil { + return nil, err + } + pkg.PackageLicenseConcluded = licenseConcluded + case SPDX_LICENSE_INFO_FROM_FILES: // 3.14 + // cardinality: min 0 + pkg.PackageLicenseInfoFromFiles = append(pkg.PackageLicenseInfoFromFiles, getLicenseStringFromURI(subTriple.Object.ID)) + case SPDX_LICENSE_DECLARED: // 3.15 + // cardinality: exactly 1 + license, err := parser.getLicenseFromTriple(subTriple) + if err != nil { + return nil, err + } + pkg.PackageLicenseDeclared = license + case SPDX_LICENSE_COMMENTS: // 3.16 + // cardinality: max 1 + pkg.PackageLicenseComments = subTriple.Object.ID + case SPDX_COPYRIGHT_TEXT: // 3.17 + // cardinality: exactly 1 + pkg.PackageCopyrightText = subTriple.Object.ID + case SPDX_SUMMARY: // 3.18 + // cardinality: max 1 + pkg.PackageSummary = subTriple.Object.ID + case SPDX_DESCRIPTION: // 3.19 + // cardinality: max 1 + pkg.PackageDescription = subTriple.Object.ID + case RDFS_COMMENT: // 3.20 + // cardinality: max 1 + pkg.PackageComment = subTriple.Object.ID + case SPDX_EXTERNAL_REF: // 3.21 + // cardinality: min 0 + externalDocRef, err := parser.getPackageExternalRef(subTriple) + if err != nil { + return nil, err + } + pkg.PackageExternalReferences = append(pkg.PackageExternalReferences, externalDocRef) + case SPDX_HAS_FILE: // 3.22 + // cardinality: min 0 + file, err := parser.getFileFromNode(subTriple.Object) + if err != nil { + return nil, err + } + parser.setFileToPackage(pkg, file) + case SPDX_RELATIONSHIP: + // cardinality: min 0 + err := parser.parseRelationship(subTriple) + if err != nil { + return nil, err + } + case SPDX_ATTRIBUTION_TEXT: + // cardinality: min 0 + pkg.PackageAttributionTexts = append(pkg.PackageAttributionTexts, subTriple.Object.ID) + case SPDX_ANNOTATION: + // cardinality: min 0 + err := parser.parseAnnotationFromNode(subTriple.Object) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("unknown predicate id %s while parsing a package", subTriple.Predicate.ID) + } + if err != nil { + return nil, err + } + } + + return pkg, nil +} + +// parses externalReference found in the package by the associated triple. +func (parser *rdfParser2_2) getPackageExternalRef(triple *gordfParser.Triple) (externalDocRef *spdx.PackageExternalReference2_2, err error) { + externalDocRef = &spdx.PackageExternalReference2_2{} + for _, subTriple := range parser.nodeToTriples[triple.Object.String()] { + switch subTriple.Predicate.ID { + case SPDX_REFERENCE_CATEGORY: + // cardinality: exactly 1 + switch subTriple.Object.ID { + case SPDX_REFERENCE_CATEGORY_SECURITY: + externalDocRef.Category = "SECURITY" + case SPDX_REFERENCE_CATEGORY_PACKAGE_MANAGER: + externalDocRef.Category = "PACKAGE-MANAGER" + case SPDX_REFERENCE_CATEGORY_OTHER: + externalDocRef.Category = "OTHER" + default: + return nil, fmt.Errorf("unknown packageManager predicate uri %s", subTriple.Predicate.ID) + } + case RDF_TYPE: + continue + case SPDX_REFERENCE_TYPE: + // assumes: the reference type is associated with just the uri and + // other associated fields are ignored. + // other fields include: + // 1. contextualExample, + // 2. documentation and, + // 3. externalReferenceSite + externalDocRef.RefType = subTriple.Object.ID + case SPDX_REFERENCE_LOCATOR: + // cardinality: exactly 1 + externalDocRef.Locator = subTriple.Object.ID + case RDFS_COMMENT: + // cardinality: max 1 + externalDocRef.ExternalRefComment = subTriple.Object.ID + default: + return nil, fmt.Errorf("unknown package external reference predicate id %s", subTriple.Predicate.ID) + } + } + return +} + +func (parser *rdfParser2_2) setPackageVerificationCode(pkg *spdx.Package2_2, node *gordfParser.Node) error { + for _, subTriple := range parser.nodeToTriples[node.String()] { + switch subTriple.Predicate.ID { + case SPDX_PACKAGE_VERIFICATION_CODE_VALUE: + // cardinality: exactly 1 + pkg.PackageVerificationCode = subTriple.Object.ID + case SPDX_PACKAGE_VERIFICATION_CODE_EXCLUDED_FILE: + // cardinality: min 0 + pkg.PackageVerificationCodeExcludedFile = subTriple.Object.ID + case RDF_TYPE: + // cardinality: exactly 1 + continue + default: + return fmt.Errorf("unparsed predicate %s", subTriple.Predicate.ID) + } + } + return nil +} + +func (parser *rdfParser2_2) setFileToPackage(pkg *spdx.Package2_2, file *spdx.File2_2) { + if pkg.Files == nil { + pkg.Files = map[spdx.ElementID]*spdx.File2_2{} + } + pkg.Files[file.FileSPDXIdentifier] = file + parser.assocWithPackage[file.FileSPDXIdentifier] = true +} + +// given a supplierObject, sets the PackageSupplier attribute of the pkg. +// Args: +// value: [NOASSERTION | [Person | Organization]: string] +func setPackageSupplier(pkg *spdx.Package2_2, value string) error { + value = strings.TrimSpace(value) + if strings.ToUpper(value) == "NOASSERTION" { + pkg.PackageSupplierNOASSERTION = true + return nil + } + subKey, subValue, err := ExtractSubs(value, ":") + if err != nil { + return fmt.Errorf("package supplier must be of the form NOASSERTION or [Person|Organization]: string. found: %s", value) + } + switch subKey { + case "Person": + pkg.PackageSupplierPerson = subValue + case "Organization": + pkg.PackageSupplierOrganization = subValue + default: + return fmt.Errorf("unknown supplier %s", subKey) + } + return nil +} + +// given a OriginatorObject, sets the PackageOriginator attribute of the pkg. +// Args: +// value: [NOASSERTION | [Person | Organization]: string] +func setPackageOriginator(pkg *spdx.Package2_2, value string) error { + value = strings.TrimSpace(value) + if strings.ToUpper(value) == "NOASSERTION" { + pkg.PackageOriginatorNOASSERTION = true + return nil + } + subKey, subValue, err := ExtractSubs(value, ":") + if err != nil { + return fmt.Errorf("package originator must be of the form NOASSERTION or [Person|Organization]: string. found: %s", value) + } + + switch subKey { + case "Person": + pkg.PackageOriginatorPerson = subValue + case "Organization": + pkg.PackageOriginatorOrganization = subValue + default: + return fmt.Errorf("originator can be either a Person or Organization. found %s", subKey) + } + return nil +} + +// validates the uri and sets the location if it is valid +func setDocumentLocationFromURI(locationURI string, pkg *spdx.Package2_2) error { + switch locationURI { + case SPDX_NOASSERTION: + pkg.PackageDownloadLocation = "NOASSERTION" + case SPDX_NONE: + pkg.PackageDownloadLocation = "NONE" + default: + if !isUriValid(locationURI) { + return fmt.Errorf("%s is not a valid uri", locationURI) + } + pkg.PackageDownloadLocation = locationURI + } + return nil +} + +// sets the FilesAnalyzed attribute to the given package +// boolValue is a string of type "true" or "false" +func setFilesAnalyzed(boolValue string, pkg *spdx.Package2_2) error { + pkg.IsFilesAnalyzedTagPresent = true + switch strings.TrimSpace(boolValue) { + case "true": + pkg.FilesAnalyzed = true + case "false": + pkg.FilesAnalyzed = false + default: + return fmt.Errorf("filesAnalyzed can be either true/false. found %s", boolValue) + } + return nil +} + +func setPackageChecksum(parser *rdfParser2_2, pkg *spdx.Package2_2, node *gordfParser.Node) error { + checksumAlgorithm, checksumValue, err := parser.getChecksumFromNode(node) + if err != nil { + return err + } + switch checksumAlgorithm { + case "MD5": + pkg.PackageChecksumMD5 = checksumValue + case "SHA1": + pkg.PackageChecksumSHA1 = checksumValue + case "SHA256": + pkg.PackageChecksumSHA256 = checksumValue + default: + return fmt.Errorf("unknown checksumAlgorithm %s while parsing a package", checksumAlgorithm) + } + return nil +} diff --git a/rdfloader/parser2v2/parse_relationship.go b/rdfloader/parser2v2/parse_relationship.go new file mode 100644 index 0000000..d021595 --- /dev/null +++ b/rdfloader/parser2v2/parse_relationship.go @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +package parser2v2 + +import ( + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + "github.com/RishabhBhatnagar/gordf/rdfwriter" + "github.com/spdx/tools-golang/spdx" + "strings" +) + +// parsing the relationship that exists in the rdf document. +// Relationship is of type RefA relationType RefB. +// parsing the relationship appends the relationship to the current document's +// Relationships Slice. +func (parser *rdfParser2_2) parseRelationship(triple *gordfParser.Triple) (err error) { + reln := spdx.Relationship2_2{} + + reln.RefA, err = getReferenceFromURI(triple.Subject.ID) + if err != nil { + return err + } + + for _, subTriple := range parser.nodeToTriples[triple.Object.String()] { + switch subTriple.Predicate.ID { + case SPDX_RELATIONSHIP_TYPE: + // cardinality: exactly 1 + reln.Relationship, err = getRelationshipType(subTriple.Object.ID) + case RDF_TYPE: + // cardinality: exactly 1 + continue + case SPDX_RELATED_SPDX_ELEMENT: + // cardinality: exactly 1 + // assumes: spdx-element is a uri + reln.RefB, err = getReferenceFromURI(subTriple.Object.ID) + if err != nil { + return err + } + + relatedSpdxElementTriples := parser.nodeToTriples[subTriple.Object.String()] + if len(relatedSpdxElementTriples) == 0 { + continue + } + + typeTriples := rdfwriter.FilterTriples(relatedSpdxElementTriples, &subTriple.Object.ID, &RDF_TYPE, nil) + if len(typeTriples) != 1 { + return fmt.Errorf("expected %s to have exactly one rdf:type triple. found %d triples", subTriple.Object, len(typeTriples)) + } + err = parser.parseRelatedElementFromTriple(reln, typeTriples[0]) + if err != nil { + return err + } + case RDFS_COMMENT: + // cardinality: max 1 + reln.RelationshipComment = subTriple.Object.ID + default: + return fmt.Errorf("unexpected predicate id: %s", subTriple.Predicate.ID) + } + } + parser.doc.Relationships = append(parser.doc.Relationships, &reln) + return nil +} + +func (parser *rdfParser2_2) parseRelatedElementFromTriple(reln spdx.Relationship2_2, triple *gordfParser.Triple) error { + // iterate over relatedElement Type and check which SpdxElement it is. + var err error + switch triple.Object.ID { + case SPDX_FILE: + file, err := parser.getFileFromNode(triple.Subject) + if err != nil { + return fmt.Errorf("error setting a package: %v", err) + } + reln.RefB, err = ExtractDocElementID(getLastPartOfURI(triple.Subject.ID)) + if err != nil { + return err + } + parser.files[file.FileSPDXIdentifier] = file + + case SPDX_PACKAGE: + pkg, err := parser.getPackageFromNode(triple.Subject) + if err != nil { + return fmt.Errorf("error setting a package inside a relationship: %v", err) + } + reln.RefB, err = ExtractDocElementID(getLastPartOfURI(triple.Subject.ID)) + if err != nil { + return err + } + + parser.packages[pkg.PackageSPDXIdentifier] = pkg + + case SPDX_SPDX_ELEMENT: + // it shouldn't be associated with any other triple. + // it must be a uri reference. + reln.RefB, err = ExtractDocElementID(getLastPartOfURI(triple.Subject.ID)) + if err != nil { + return err + } + default: + return fmt.Errorf("undefined relatedElement %s found while parsing relationship", triple.Object.ID) + } + return nil +} + +func getReferenceFromURI(uri string) (spdx.DocElementID, error) { + fragment := getLastPartOfURI(uri) + switch strings.ToLower(strings.TrimSpace(fragment)) { + case "noassertion", "none": + return spdx.DocElementID{ + DocumentRefID: "", + ElementRefID: spdx.ElementID(strings.ToUpper(fragment)), + }, nil + } + return ExtractDocElementID(fragment) +} + +func getRelationshipType(relnType string) (string, error) { + relnType = strings.TrimSpace(relnType) + if !strings.HasPrefix(relnType, PREFIX_RELATIONSHIP_TYPE) { + return "", fmt.Errorf("relationshipType must start with %s. found %s", PREFIX_RELATIONSHIP_TYPE, relnType) + } + relnType = strings.TrimPrefix(relnType, PREFIX_RELATIONSHIP_TYPE) + + relnType = strings.TrimSpace(relnType) + for _, validRelationshipType := range AllRelationshipTypes() { + if relnType == validRelationshipType { + return relnType, nil + } + } + return "", fmt.Errorf("unknown relationshipType: %s", relnType) +} diff --git a/rdfloader/parser2v2/parse_review.go b/rdfloader/parser2v2/parse_review.go new file mode 100644 index 0000000..08e805d --- /dev/null +++ b/rdfloader/parser2v2/parse_review.go @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +package parser2v2 + +import ( + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + "github.com/spdx/tools-golang/spdx" +) + +func (parser *rdfParser2_2) setReviewFromNode(reviewedNode *gordfParser.Node) error { + review := spdx.Review2_2{} + for _, triple := range parser.nodeToTriples[reviewedNode.String()] { + switch triple.Predicate.ID { + case RDF_TYPE: + // cardinality: exactly 1 + continue + case RDFS_COMMENT: + // cardinality: max 1 + review.ReviewComment = triple.Object.ID + case SPDX_REVIEW_DATE: + // cardinality: exactly 1 + review.ReviewDate = triple.Object.ID + case SPDX_REVIEWER: + // cardinality: max 1 + review.Reviewer = triple.Object.ID + default: + return fmt.Errorf("unknown predicate %v for review triples", triple.Predicate) + } + } + parser.doc.Reviews = append(parser.doc.Reviews, &review) + return nil +} diff --git a/rdfloader/parser2v2/parse_snippet_info.go b/rdfloader/parser2v2/parse_snippet_info.go new file mode 100644 index 0000000..7bd5624 --- /dev/null +++ b/rdfloader/parser2v2/parse_snippet_info.go @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +package parser2v2 + +import ( + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + "github.com/spdx/tools-golang/spdx" + "strconv" + "strings" +) + +// Snippet Information +// Cardinality: Optional, Many +func (parser *rdfParser2_2) getSnippetInformationFromTriple2_2(triple *gordfParser.Triple) (si *spdx.Snippet2_2, err error) { + si = &spdx.Snippet2_2{} + + err = setSnippetID(triple.Subject.ID, si) + if err != nil { + return nil, err + } + + for _, siTriple := range parser.nodeToTriples[triple.Subject.String()] { + switch siTriple.Predicate.ID { + case RDF_TYPE: + // cardinality: exactly 1 + case SPDX_SNIPPET_FROM_FILE: + // cardinality: exactly 1 + // file which is associated with the snippet + file, err := parser.getFileFromNode(siTriple.Object) + if err != nil { + return nil, err + } + si.SnippetFromFileSPDXIdentifier, err = ExtractDocElementID(getLastPartOfURI(siTriple.Object.ID)) + parser.files[file.FileSPDXIdentifier] = file + case SPDX_NAME: + si.SnippetName = siTriple.Object.ID + case SPDX_COPYRIGHT_TEXT: + si.SnippetCopyrightText = siTriple.Object.ID + case SPDX_LICENSE_COMMENTS: + si.SnippetLicenseComments = siTriple.Object.ID + case SPDX_LICENSE_INFO_IN_SNIPPET: + si.LicenseInfoInSnippet = append(si.LicenseInfoInSnippet, siTriple.Object.ID) + case RDFS_COMMENT: + si.SnippetComment = siTriple.Object.ID + case SPDX_LICENSE_CONCLUDED: + si.SnippetLicenseConcluded = siTriple.Object.ID + case SPDX_RANGE: + // cardinality: min 1 + err = parser.setSnippetRangeFromNode(siTriple.Object, si) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("unknown predicate %v", siTriple.Predicate.ID) + } + } + return si, nil +} + +// given is the id of the file, sets the snippet to the file in parser. +func (parser *rdfParser2_2) setSnippetToFileWithID(snippet *spdx.Snippet2_2, fileID spdx.ElementID) error { + if parser.files[fileID] == nil { + return fmt.Errorf("snippet refers to an undefined file with ID: %s", fileID) + } + + // initializing snippet of the files if it is not defined already + if parser.files[fileID].Snippets == nil { + parser.files[fileID].Snippets = map[spdx.ElementID]*spdx.Snippet2_2{} + } + + // setting the snippet to the file. + parser.files[fileID].Snippets[snippet.SnippetSPDXIdentifier] = snippet + + return nil +} + +func (parser *rdfParser2_2) setSnippetRangeFromNode(node *gordfParser.Node, si *spdx.Snippet2_2) error { + // todo: apply DRY in this method. + rangeType := 0 // 0: undefined range, 1: byte, 2: line + var start, end string + for _, t := range parser.nodeToTriples[node.String()] { + switch t.Predicate.ID { + case RDF_TYPE: + if t.Object.ID != PTR_START_END_POINTER { + return fmt.Errorf("expected range to have sub tag of type StartEndPointer, found %v", t.Object.ID) + } + case PTR_START_POINTER: + for _, subTriple := range parser.nodeToTriples[t.Object.String()] { + switch subTriple.Predicate.ID { + case RDF_TYPE: + switch subTriple.Object.ID { + case PTR_BYTE_OFFSET_POINTER: + if rangeType == 2 { + return fmt.Errorf("byte offset pointer merged with line offset pointer") + } + rangeType = 1 + case PTR_LINE_CHAR_POINTER: + if rangeType == 1 { + return fmt.Errorf("byte offset pointer merged with line offset pointer") + } + rangeType = 2 + default: + return fmt.Errorf("illegal pointer type %v", subTriple.Object.ID) + } + case PTR_REFERENCE: + err := parser.parseRangeReference(subTriple.Object, si) + if err != nil { + return nil + } + case PTR_OFFSET, PTR_LINE_NUMBER: + start = subTriple.Object.ID + default: + return fmt.Errorf("undefined predicate %v while parsing range", subTriple.Predicate.ID) + } + } + case PTR_END_POINTER: + subTriples := parser.nodeToTriples[t.Object.String()] + for _, subTriple := range subTriples { + switch subTriple.Predicate.ID { + case RDF_TYPE: + switch subTriple.Object.ID { + case PTR_BYTE_OFFSET_POINTER: + if rangeType == 2 { + return fmt.Errorf("byte offset pointer merged with line offset pointer") + } + rangeType = 1 + case PTR_LINE_CHAR_POINTER: + if rangeType == 1 { + return fmt.Errorf("byte offset pointer merged with line offset pointer") + } + rangeType = 2 + default: + return fmt.Errorf("illegal pointer type %v", subTriple.Object.ID) + } + case PTR_REFERENCE: + err := parser.parseRangeReference(subTriple.Object, si) + if err != nil { + return nil + } + case PTR_OFFSET, PTR_LINE_NUMBER: + end = subTriple.Object.ID + } + } + default: + return fmt.Errorf("unknown predicate %v", t.Predicate.ID) + } + } + if rangeType != 1 && rangeType != 2 { + return fmt.Errorf("undefined range type") + } + startNumber, err := strconv.Atoi(strings.TrimSpace(start)) + if err != nil { + return fmt.Errorf("invalid number for range start: %v", start) + } + endNumber, err := strconv.Atoi(strings.TrimSpace(end)) + if err != nil { + return fmt.Errorf("invalid number for range end: %v", end) + } + if rangeType == 1 { + // byte range + si.SnippetByteRangeStart = startNumber + si.SnippetByteRangeEnd = endNumber + } else { + // line range + si.SnippetLineRangeStart = startNumber + si.SnippetLineRangeEnd = endNumber + } + return nil +} + +func (parser *rdfParser2_2) parseRangeReference(node *gordfParser.Node, snippet *spdx.Snippet2_2) error { + // reference is supposed to be either a resource reference to a file or a new file + // Unfortunately, I didn't find field where this can be set in the tools-golang data model. + // todo: set this reference to the snippet + switch node.NodeType { + case gordfParser.RESOURCELITERAL, gordfParser.LITERAL, gordfParser.BLANK: + return nil + } + file, err := parser.getFileFromNode(node) + if err != nil { + return fmt.Errorf("error parsing a new file in a reference") + } + + // a new file found within the pointer reference is an unpackaged file. + if parser.doc.UnpackagedFiles == nil { + parser.doc.UnpackagedFiles = map[spdx.ElementID]*spdx.File2_2{} + } + parser.doc.UnpackagedFiles[file.FileSPDXIdentifier] = file + return nil +} + +func setSnippetID(uri string, si *spdx.Snippet2_2) (err error) { + fragment := getLastPartOfURI(uri) + si.SnippetSPDXIdentifier, err = ExtractElementID(fragment) + if err != nil { + return fmt.Errorf("error setting snippet identifier: %v", uri) + } + return nil +} diff --git a/rdfloader/parser2v2/parse_spdx_document.go b/rdfloader/parser2v2/parse_spdx_document.go new file mode 100644 index 0000000..7769e18 --- /dev/null +++ b/rdfloader/parser2v2/parse_spdx_document.go @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +package parser2v2 + +import ( + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + "github.com/spdx/tools-golang/spdx" +) + +func (parser *rdfParser2_2) parseSpdxDocumentNode(spdxDocNode *gordfParser.Node) (err error) { + // create a new creation info + ci := parser.doc.CreationInfo + + // parse the document header information (SPDXID and document namespace) + // the Subject.ID is of type baseURI#spdxID + baseUri, offset, err := ExtractSubs(spdxDocNode.ID, "#") + if err != nil { + return err + } + ci.DocumentNamespace = baseUri // 2.5 + ci.SPDXIdentifier = spdx.ElementID(offset) // 2.3 + + // parse other associated triples. + for _, subTriple := range parser.nodeToTriples[spdxDocNode.String()] { + objectValue := subTriple.Object.ID + switch subTriple.Predicate.ID { + case SPDX_SPEC_VERSION: // 2.1: specVersion + // cardinality: exactly 1 + ci.SPDXVersion = objectValue + case SPDX_DATA_LICENSE: // 2.2: dataLicense + // cardinality: exactly 1 + ci.DataLicense, err = parser.getLicenseFromTriple(subTriple) // todo: sort it out. + case SPDX_NAME: // 2.4: DocumentName + // cardinality: exactly 1 + ci.DocumentName = objectValue + case SPDX_EXTERNAL_DOCUMENT_REF: // 2.6: externalDocumentReferences + // cardinality: min 0 + var extRef string + extRef, err = parser.getExternalDocumentRefFromTriples(parser.nodeToTriples[subTriple.Object.String()]) + ci.ExternalDocumentReferences = append(ci.ExternalDocumentReferences, extRef) + case SPDX_CREATION_INFO: // 2.7 - 2.10: + // cardinality: exactly 1 + err = parser.parseCreationInfoFromNode(ci, subTriple.Object) + case SPDX_COMMENT: // 2.11: Document Comment + // cardinality: max 1 + ci.DocumentComment = objectValue + case SPDX_REVIEWED: // reviewed: + // cardinality: min 0 + err = parser.setReviewFromNode(subTriple.Object) + case SPDX_DESCRIBES_PACKAGE: // describes Package + // cardinality: min 0 + var pkg *spdx.Package2_2 + pkg, err = parser.getPackageFromNode(subTriple.Object) + if err != nil { + return err + } + parser.doc.Packages[pkg.PackageSPDXIdentifier] = pkg + case SPDX_HAS_EXTRACTED_LICENSING_INFO: // hasExtractedLicensingInfo + // cardinality: min 0 + err = parser.parseOtherLicenseFromNode(subTriple.Object) // todo: sort this out. + case SPDX_RELATIONSHIP: // relationship + // cardinality: min 0 + err = parser.parseRelationship(subTriple) + case SPDX_ANNOTATION: // annotations + // cardinality: min 0 + err = parser.parseAnnotationFromNode(subTriple.Object) + } + if err != nil { + return err + } + } + + // control reaches here iff no error is encountered + // set the ci if no error is encountered while parsing triples. + parser.doc.CreationInfo = ci + return nil +} + +func (parser *rdfParser2_2) getExternalDocumentRefFromTriples(triples []*gordfParser.Triple) (string, error) { + var docID, checksumValue, checksumAlgorithm, spdxDocument string + var err error + for _, triple := range triples { + switch triple.Predicate.ID { + case SPDX_EXTERNAL_DOCUMENT_ID: + // cardinality: exactly 1 + docID = triple.Object.ID + case SPDX_SPDX_DOCUMENT: + // cardinality: exactly 1 + // assumption: "spdxDocument" property of an external document + // reference is just a uri which doesn't follow a spdxDocument definition + spdxDocument = triple.Object.ID + case SPDX_CHECKSUM: + // cardinality: exactly 1 + checksumAlgorithm, checksumValue, err = parser.getChecksumFromNode(triple.Object) + if err != nil { + return "", err + } + case RDF_TYPE: + continue + default: + return "", fmt.Errorf("unknown predicate ID (%s) while parsing externalDocumentReference", triple.Predicate.ID) + } + } + // transform the variables into string form (same as that of tag-value). + return fmt.Sprintf("%s %s %s: %s", docID, spdxDocument, checksumAlgorithm, checksumValue), nil +} diff --git a/rdfloader/parser2v2/parser.go b/rdfloader/parser2v2/parser.go new file mode 100644 index 0000000..3145688 --- /dev/null +++ b/rdfloader/parser2v2/parser.go @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +package parser2v2 + +import ( + "errors" + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + gordfWriter "github.com/RishabhBhatnagar/gordf/rdfwriter" + "github.com/spdx/tools-golang/spdx" +) + +// returns a new instance of rdfParser2_2 given the gordf object and nodeToTriples mapping +func NewParser2_2(gordfParserObj *gordfParser.Parser, nodeToTriples map[string][]*gordfParser.Triple) *rdfParser2_2 { + parser := rdfParser2_2{ + gordfParserObj: gordfParserObj, + nodeToTriples: nodeToTriples, + doc: &spdx.Document2_2{ + CreationInfo: &spdx.CreationInfo2_2{}, + Packages: map[spdx.ElementID]*spdx.Package2_2{}, + UnpackagedFiles: map[spdx.ElementID]*spdx.File2_2{}, + OtherLicenses: []*spdx.OtherLicense2_2{}, + Relationships: []*spdx.Relationship2_2{}, + Annotations: []*spdx.Annotation2_2{}, + Reviews: []*spdx.Review2_2{}, + }, + files: map[spdx.ElementID]*spdx.File2_2{}, + packages: map[spdx.ElementID]*spdx.Package2_2{}, + assocWithPackage: map[spdx.ElementID]bool{}, + } + return &parser +} + +// main function which takes in a gordfParser and returns +// a spdxDocument model or the error encountered while parsing it +func LoadFromGoRDFParser(gordfParserObj *gordfParser.Parser) (*spdx.Document2_2, error) { + // nodeToTriples is a mapping from a node to list of triples. + // for every node in the set of subjects of all the triples, + // it provides a list of triples that are associated with that subject node. + nodeToTriples := gordfWriter.GetNodeToTriples(gordfParserObj.Triples) + parser := NewParser2_2(gordfParserObj, nodeToTriples) + + spdxDocumentNode, err := parser.getSpdxDocNode() + if err != nil { + return nil, err + } + + err = parser.parseSpdxDocumentNode(spdxDocumentNode) + if err != nil { + return nil, err + } + + // parsing other root elements + for _, rootNode := range gordfWriter.GetRootNodes(parser.gordfParserObj.Triples) { + typeTriples := gordfWriter.FilterTriples(gordfParserObj.Triples, &rootNode.ID, &RDF_TYPE, nil) + if len(typeTriples) != 1 { + return nil, fmt.Errorf("every node must be associated with exactly 1 type Triple. found %d type triples", len(typeTriples)) + } + switch typeTriples[0].Object.ID { + case SPDX_SPDX_DOCUMENT_CAPITALIZED: + continue // it is already parsed. + case SPDX_SNIPPET: + snippet, err := parser.getSnippetInformationFromTriple2_2(typeTriples[0]) + if err != nil { + return nil, err + } + err = parser.setSnippetToFileWithID(snippet, snippet.SnippetFromFileSPDXIdentifier.ElementRefID) + if err != nil { + return nil, err + } + // todo: check other root node attributes. + default: + continue + // because in rdf it is quite possible that the root node is an + // element that has been used in the some other element as a child + } + } + + // parsing packages and files sets the files to a files variable which is + // associated with the parser and not the document. following method is + // necessary to transfer the files which are not set in the packages to the + // UnpackagedFiles attribute of the document + // WARNING: do not relocate following function call. It must be at the end of the function + parser.setUnpackagedFiles() + return parser.doc, nil +} + +// from the given parser object, returns the SpdxDocument Node defined in the root elements. +// returns error if the document is associated with no SpdxDocument or +// associated with more than one SpdxDocument node. +func (parser *rdfParser2_2) getSpdxDocNode() (node *gordfParser.Node, err error) { + /* Possible Questions: + 1. why are you traversing the root nodes only? why not directly filter out + all the triples with rdf:type=spdx:SpdxDocument? + Ans: It is quite possible that the relatedElement or any other attribute + to have dependency of another SpdxDocument. In that case, that + element will reference the dependency using SpdxDocument tag which will + cause false positives when direct filtering is done. + */ + // iterate over root nodes and find the node which has a property of rdf:type=spdx:SpdxDocument + var spdxDocNode *gordfParser.Node + for _, rootNode := range gordfWriter.GetRootNodes(parser.gordfParserObj.Triples) { + typeTriples := gordfWriter.FilterTriples( + parser.nodeToTriples[rootNode.String()], // triples + &rootNode.ID, // Subject + &RDF_TYPE, // Predicate + nil, // Object + ) + if len(typeTriples) != 1 { + return nil, fmt.Errorf("rootNode (%v) must be associated with exactly one"+ + " triple of predicate rdf:type, found %d triples", rootNode, len(typeTriples)) + } + if typeTriples[0].Object.ID == SPDX_SPDX_DOCUMENT_CAPITALIZED { + // we found a SpdxDocument Node + // checking if we've already found a node and it is not same as the current one. + if spdxDocNode != nil && spdxDocNode.ID != typeTriples[0].Subject.ID { + return nil, fmt.Errorf("found more than one SpdxDocument Node (%v and %v)", spdxDocNode, typeTriples[0].Subject) + } + spdxDocNode = typeTriples[0].Subject + } + } + if spdxDocNode == nil { + return nil, fmt.Errorf("RDF files must be associated with a SpdxDocument tag. No tag found") + } + return spdxDocNode, nil +} + +// unused +func (parser *rdfParser2_2) setFiles() error { + allFilesTriples, err := parser.filterTriplesByRegex(parser.gordfParserObj.Triples, ".*", RDF_TYPE+"$", SPDX_FILE+"$") + if err != nil { + return err + } + for _, fileTriple := range allFilesTriples { + file, err := parser.getFileFromNode(fileTriple.Subject) + if err != nil { + return fmt.Errorf("error setting a file: %v", err) + } + parser.files[file.FileSPDXIdentifier] = file + } + return nil +} + +// unused +func (parser *rdfParser2_2) setPackages() error { + allPackagesTriples, err := parser.filterTriplesByRegex(parser.gordfParserObj.Triples, ".*", RDF_TYPE+"$", SPDX_PACKAGE+"$") + if err != nil { + return err + } + for _, pkgTriple := range allPackagesTriples { + pkg, err := parser.getPackageFromNode(pkgTriple.Subject) + if err != nil { + return fmt.Errorf("error setting a package: %v", err) + } + parser.packages[pkg.PackageSPDXIdentifier] = pkg + } + return nil +} + +// unused +// assumes that the document's namespace is already set. +func (parser *rdfParser2_2) setSnippetToDoc(si *spdx.Snippet2_2, snippetNode *gordfParser.Node) (err error) { + if parser.doc == nil || parser.doc.CreationInfo == nil { + return errors.New("document namespace not set yet") + } + docNS := parser.doc.CreationInfo.DocumentNamespace + snippetNS := stripLastPartOfUri(snippetNode.ID) + if !isUriSame(docNS, snippetNS) { + // found a snippet which doesn't belong to current document being set + return fmt.Errorf("document namespace(%s) and snippet namespace(%s) doesn't match", docNS, snippetNS) + } + + return nil +} + +// unused +func (parser *rdfParser2_2) setAnnotations(spdxDocNode *gordfParser.Node) error { + triples := gordfWriter.FilterTriples(parser.gordfParserObj.Triples, &spdxDocNode.ID, &SPDX_ANNOTATION, nil) + for _, triple := range triples { + err := parser.parseAnnotationFromNode(triple.Object) + if err != nil { + return err + } + } + return nil +} + +// unused +func (parser *rdfParser2_2) getSpecVersion(spdxDocNode *gordfParser.Node) (string, error) { + specVersionTriples := gordfWriter.FilterTriples(parser.gordfParserObj.Triples, &spdxDocNode.ID, &SPDX_SPEC_VERSION, nil) + n := len(specVersionTriples) + if n == 0 { + return "", fmt.Errorf("no specVersion found for the given spdxNode") + } + if n > 1 { + return "", fmt.Errorf("there must be exactly one specVersion. found %d specVersion", n) + } + return specVersionTriples[0].Object.ID, nil +} + +// unused +func (parser *rdfParser2_2) getDataLicense(spdxDocNode *gordfParser.Node) (string, error) { + dataLicenseTriples := gordfWriter.FilterTriples(parser.gordfParserObj.Triples, &spdxDocNode.ID, &SPDX_DATA_LICENSE, nil) + n := len(dataLicenseTriples) + if n == 0 { + return "", fmt.Errorf("no dataLicense found for the given spdxNode") + } + if n > 1 { + return "", fmt.Errorf("there must be exactly one dataLicense. found %d dataLicense", n) + } + return parser.getLicenseFromTriple(dataLicenseTriples[0]) +} + +// unused +func (parser *rdfParser2_2) getDocumentName(spdxDocNode *gordfParser.Node) (string, error) { + triples := gordfWriter.FilterTriples(parser.gordfParserObj.Triples, &spdxDocNode.ID, &SPDX_NAME, nil) + n := len(triples) + if n == 0 { + return "", fmt.Errorf("no documentName found for the given spdxNode") + } + if n > 1 { + return "", fmt.Errorf("there must be exactly one documentName. found %d documentName", n) + } + return triples[0].Object.ID, nil +} + +// unused +func (parser *rdfParser2_2) setCreationInfo(spdxDocNode *gordfParser.Node, ci *spdx.CreationInfo2_2) error { + docNS := stripJoiningChars(stripLastPartOfUri(spdxDocNode.ID)) + allCreationInfoTriples, err := parser.filterTriplesByRegex(parser.gordfParserObj.Triples, docNS+".*", SPDX_CREATION_INFO, ".*") + if err != nil { + return err + } + n := len(allCreationInfoTriples) + if n > 1 { + return fmt.Errorf("document(%s) must have exactly one creation info. found %d", docNS, n) + } + if n == 0 { + return fmt.Errorf("no creation info found for the document identified by %s", docNS) + } + err = parser.parseCreationInfoFromNode(ci, allCreationInfoTriples[0].Object) + if err != nil { + return err + } + parser.doc.CreationInfo = ci + return nil +} + +// unused +func (parser *rdfParser2_2) getDocumentComment(spdxDocNode *gordfParser.Node) (string, error) { + triples := gordfWriter.FilterTriples(parser.gordfParserObj.Triples, &spdxDocNode.ID, &SPDX_NAME, nil) + n := len(triples) + if n > 1 { + return "", fmt.Errorf("there must be atmost one documentComment. found %d documentComment", n) + } + if n == 0 { + return triples[0].Object.ID, nil + } + return "", nil +} + +// unused +func (parser *rdfParser2_2) setReviewed(spdxDocNode *gordfParser.Node) error { + triples := gordfWriter.FilterTriples(parser.gordfParserObj.Triples, &spdxDocNode.ID, &SPDX_REVIEWED, nil) + for _, triple := range triples { + err := parser.setReviewFromNode(triple.Object) + if err != nil { + return err + } + } + return nil +} + +// unused +func (parser *rdfParser2_2) setDescribesPackage(spdxDocNode *gordfParser.Node) error { + triples := gordfWriter.FilterTriples(parser.gordfParserObj.Triples, &spdxDocNode.ID, &SPDX_DESCRIBES_PACKAGE, nil) + for _, triple := range triples { + pkg, err := parser.getPackageFromNode(triple.Object) + if err != nil { + return err + } + parser.doc.Packages[pkg.PackageSPDXIdentifier] = pkg + } + return nil +} + +// unused +func (parser *rdfParser2_2) setExtractedLicensingInfo(spdxDocNode *gordfParser.Node) error { + triples := gordfWriter.FilterTriples(parser.gordfParserObj.Triples, &spdxDocNode.ID, &SPDX_HAS_EXTRACTED_LICENSING_INFO, nil) + for _, triple := range triples { + err := parser.parseOtherLicenseFromNode(triple.Object) + if err != nil { + return err + } + } + return nil +} + +// unused +func (parser *rdfParser2_2) setRelationships(spdxDocNode *gordfParser.Node) error { + triples := gordfWriter.FilterTriples(parser.gordfParserObj.Triples, &spdxDocNode.ID, &SPDX_RELATIONSHIP, nil) + for _, triple := range triples { + err := parser.parseRelationship(triple) + if err != nil { + return err + } + } + return nil +} diff --git a/rdfloader/parser2v2/types.go b/rdfloader/parser2v2/types.go new file mode 100644 index 0000000..05ac65a --- /dev/null +++ b/rdfloader/parser2v2/types.go @@ -0,0 +1,26 @@ +// copied from tvloader/parser2v2/types.go +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +package parser2v2 + +import ( + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + "github.com/spdx/tools-golang/spdx" +) + +type rdfParser2_2 struct { + // fields associated with gordf project which + // will be required by rdfloader + gordfParserObj *gordfParser.Parser + nodeToTriples map[string][]*gordfParser.Triple + + // document into which data is being parsed + doc *spdx.Document2_2 + + // map of packages and files. + files map[spdx.ElementID]*spdx.File2_2 + assocWithPackage map[spdx.ElementID]bool + packages map[spdx.ElementID]*spdx.Package2_2 + + // mapping of nodeStrings to parsed object to save double computation. + cache map[string]interface{} +} diff --git a/rdfloader/parser2v2/utils.go b/rdfloader/parser2v2/utils.go new file mode 100644 index 0000000..e0c3557 --- /dev/null +++ b/rdfloader/parser2v2/utils.go @@ -0,0 +1,160 @@ +package parser2v2 + +import ( + "fmt" + gordfParser "github.com/RishabhBhatnagar/gordf/rdfloader/parser" + urilib "github.com/RishabhBhatnagar/gordf/uri" + "github.com/spdx/tools-golang/spdx" + "regexp" + "strings" +) + +func getLastPartOfURI(uri string) string { + if strings.Contains(uri, "#") { + parts := strings.Split(uri, "#") + return parts[len(parts)-1] + } + parts := strings.Split(uri, "/") + return parts[len(parts)-1] +} + +func stripLastPartOfUri(uri string) string { + lastPart := getLastPartOfURI(uri) + uri = strings.TrimSuffix(uri, lastPart) + return uri +} + +func stripJoiningChars(uri string) string { + return strings.TrimSuffix(strings.TrimSuffix(uri, "/"), "#") +} + +func isUriSame(uri1, uri2 string) bool { + return stripJoiningChars(uri1) == stripJoiningChars(uri2) +} + +func (parser *rdfParser2_2) filterAllTriplesByString(subject, predicate, object string) (retTriples []*gordfParser.Triple) { + for _, triple := range parser.gordfParserObj.Triples { + if triple.Subject.ID == subject && triple.Predicate.ID == predicate && triple.Object.ID == object { + retTriples = append(retTriples, triple) + } + } + return retTriples +} + +func (parser *rdfParser2_2) filterTriplesByRegex(triples []*gordfParser.Triple, subject, predicate, object string) (retTriples []*gordfParser.Triple, err error) { + var subjectCompiled, objectCompiled, predicateCompiled *regexp.Regexp + subjectCompiled, err = regexp.Compile(subject) + if err != nil { + return + } + predicateCompiled, err = regexp.Compile(predicate) + if err != nil { + return + } + objectCompiled, err = regexp.Compile(object) + if err != nil { + return + } + for _, triple := range triples { + if subjectCompiled.MatchString(triple.Subject.ID) && predicateCompiled.MatchString(triple.Predicate.ID) && objectCompiled.MatchString(triple.Object.ID) { + retTriples = append(retTriples, triple) + } + } + return +} + +func isUriValid(uri string) bool { + _, err := urilib.NewURIRef(uri) + return err == nil +} + + +// Function Below this line is taken from the tvloader/parser2v2/utils.go + +// used to extract DocumentRef and SPDXRef values from an SPDX Identifier +// which can point either to this document or to a different one +func ExtractDocElementID(value string) (spdx.DocElementID, error) { + docRefID := "" + idStr := value + + // check prefix to see if it's a DocumentRef ID + if strings.HasPrefix(idStr, "DocumentRef-") { + // extract the part that comes between "DocumentRef-" and ":" + strs := strings.Split(idStr, ":") + // should be exactly two, part before and part after + if len(strs) < 2 { + return spdx.DocElementID{}, fmt.Errorf("no colon found although DocumentRef- prefix present") + } + if len(strs) > 2 { + return spdx.DocElementID{}, fmt.Errorf("more than one colon found") + } + + // trim the prefix and confirm non-empty + docRefID = strings.TrimPrefix(strs[0], "DocumentRef-") + if docRefID == "" { + return spdx.DocElementID{}, fmt.Errorf("document identifier has nothing after prefix") + } + // and use remainder for element ID parsing + idStr = strs[1] + } + + // check prefix to confirm it's got the right prefix for element IDs + if !strings.HasPrefix(idStr, "SPDXRef-") { + return spdx.DocElementID{}, fmt.Errorf("missing SPDXRef- prefix for element identifier") + } + + // make sure no colons are present + if strings.Contains(idStr, ":") { + // we know this means there was no DocumentRef- prefix, because + // we would have handled multiple colons above if it was + return spdx.DocElementID{}, fmt.Errorf("invalid colon in element identifier") + } + + // trim the prefix and confirm non-empty + eltRefID := strings.TrimPrefix(idStr, "SPDXRef-") + if eltRefID == "" { + return spdx.DocElementID{}, fmt.Errorf("element identifier has nothing after prefix") + } + + // we're good + return spdx.DocElementID{DocumentRefID: docRefID, ElementRefID: spdx.ElementID(eltRefID)}, nil +} + +// used to extract SPDXRef values only from an SPDX Identifier which can point +// to this document only. Use extractDocElementID for parsing IDs that can +// refer either to this document or a different one. +func ExtractElementID(value string) (spdx.ElementID, error) { + // check prefix to confirm it's got the right prefix for element IDs + if !strings.HasPrefix(value, "SPDXRef-") { + return spdx.ElementID(""), fmt.Errorf("missing SPDXRef- prefix for element identifier") + } + + // make sure no colons are present + if strings.Contains(value, ":") { + return spdx.ElementID(""), fmt.Errorf("invalid colon in element identifier") + } + + // trim the prefix and confirm non-empty + eltRefID := strings.TrimPrefix(value, "SPDXRef-") + if eltRefID == "" { + return spdx.ElementID(""), fmt.Errorf("element identifier has nothing after prefix") + } + + // we're good + return spdx.ElementID(eltRefID), nil +} + +// used to extract key / value from embedded substrings +// returns subkey, subvalue, nil if no error, or "", "", error otherwise +func ExtractSubs(value string, sep string) (string, string, error) { + // parse the value to see if it's a valid subvalue format + sp := strings.SplitN(value, sep, 2) + if len(sp) == 1 { + return "", "", fmt.Errorf("invalid subvalue format for %s (no %s found)", value, sep) + } + + subkey := strings.TrimSpace(sp[0]) + subvalue := strings.TrimSpace(sp[1]) + + return subkey, subvalue, nil +} \ No newline at end of file diff --git a/rdfloader/rdfloader.go b/rdfloader/rdfloader.go new file mode 100644 index 0000000..26a0f01 --- /dev/null +++ b/rdfloader/rdfloader.go @@ -0,0 +1,20 @@ +package rdfloader + +import ( + "github.com/RishabhBhatnagar/gordf/rdfloader" + "github.com/spdx/tools-golang/rdfloader/parser2v2" + "github.com/spdx/tools-golang/spdx" + "io" +) + +// Takes in a file Reader and returns the pertaining spdx document +// or the error if any is encountered while setting the doc. +func Load2_2(content io.Reader) (*spdx.Document2_2, error) { + var rdfParserObj, err = rdfloader.LoadFromReaderObject(content) + if err != nil { + return nil, err + } + + doc, err := parser2v2.LoadFromGoRDFParser(rdfParserObj) + return doc, err +} -- cgit v1.2.3