From 277f799109addd12cce6509215646d664bc75c5c Mon Sep 17 00:00:00 2001 From: Chris Eason Date: Sun, 6 Oct 2019 08:49:35 -0400 Subject: [PATCH] Use comments as description (#16) * Add failing test for desired functionality (using comments in the .proto file for each JSONSchema's description field) * Move .proto => FileDescriptorSet loader into its own helper function so multiple tests can use it. * Add sourceCodeInfo "indexer" (requires "--include_source_info" protoc flag to work) You give it a list of FileDescriptor, it indexes all of the SourceCodeInfo locations by what they reference and gives back a dictionary/registry that can be used to look up source info for a message/field/enum/etc definition. Example: fileDescriptors := ... fieldDescriptor := ... srcInfo := newSourceCodeInfo(fileDescriptors) fieldSrc := srcInfo.GetField(fieldDescriptor) println(fieldSrc.LeadingComments) * Populate schema description from .proto file comments (requires "--include_source_info" protoc flag to work) --- internal/converter/converter.go | 7 ++ internal/converter/converter_test.go | 73 ++++++----- internal/converter/sourcecodeinfo.go | 116 ++++++++++++++++++ internal/converter/sourcecodeinfo_test.go | 52 ++++++++ .../testdata/message_with_comments.go | 14 +++ .../testdata/proto/MessageWithComments.proto | 9 ++ internal/converter/types.go | 25 ++++ jsonschemas/MessageWithComments.jsonschema | 12 ++ 8 files changed, 278 insertions(+), 30 deletions(-) create mode 100644 internal/converter/sourcecodeinfo.go create mode 100644 internal/converter/sourcecodeinfo_test.go create mode 100644 internal/converter/testdata/message_with_comments.go create mode 100644 internal/converter/testdata/proto/MessageWithComments.proto create mode 100644 jsonschemas/MessageWithComments.jsonschema diff --git a/internal/converter/converter.go b/internal/converter/converter.go index 902bda51..80f67615 100644 --- a/internal/converter/converter.go +++ b/internal/converter/converter.go @@ -22,6 +22,7 @@ type Converter struct { DisallowBigIntsAsStrings bool UseProtoAndJSONFieldnames bool logger *logrus.Logger + sourceInfo *sourceCodeInfo } // New returns a configured *Converter: @@ -79,6 +80,11 @@ func (c *Converter) convertEnumType(enum *descriptor.EnumDescriptorProto) (jsons Version: jsonschema.Version, } + // Generate a description from src comments (if available) + if src := c.sourceInfo.GetEnum(enum); src != nil { + jsonSchemaType.Description = formatDescription(src) + } + // Allow both strings and integers: jsonSchemaType.OneOf = append(jsonSchemaType.OneOf, &jsonschema.Type{Type: "string"}) jsonSchemaType.OneOf = append(jsonSchemaType.OneOf, &jsonschema.Type{Type: "integer"}) @@ -178,6 +184,7 @@ func (c *Converter) convert(req *plugin.CodeGeneratorRequest) (*plugin.CodeGener generateTargets[file] = true } + c.sourceInfo = newSourceCodeInfo(req.GetProtoFile()) res := &plugin.CodeGeneratorResponse{} for _, file := range req.GetProtoFile() { for _, msg := range file.GetMessageType() { diff --git a/internal/converter/converter_test.go b/internal/converter/converter_test.go index 2e155381..fb8b1d83 100644 --- a/internal/converter/converter_test.go +++ b/internal/converter/converter_test.go @@ -13,12 +13,10 @@ import ( "github.com/golang/protobuf/protoc-gen-go/descriptor" plugin "github.com/golang/protobuf/protoc-gen-go/plugin" "github.com/sirupsen/logrus" - log "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" ) var ( - protocBinary = "/bin/protoc" sampleProtoDirectory = "testdata/proto" sampleProtos = make(map[string]sampleProto) ) @@ -33,13 +31,11 @@ type sampleProto struct { func TestGenerateJsonSchema(t *testing.T) { - // Make sure we have "protoc" installed and available: - testForProtocBinary(t) - // Configure the list of sample protos to test, and their expected JSON-Schemas: configureSampleProtos() // Convert the protos, compare the results against the expected JSON-Schemas: + testConvertSampleProto(t, sampleProtos["Comments"]) testConvertSampleProto(t, sampleProtos["ArrayOfMessages"]) testConvertSampleProto(t, sampleProtos["ArrayOfObjects"]) testConvertSampleProto(t, sampleProtos["ArrayOfPrimitives"]) @@ -55,16 +51,6 @@ func TestGenerateJsonSchema(t *testing.T) { testConvertSampleProto(t, sampleProtos["Maps"]) } -func testForProtocBinary(t *testing.T) { - path, err := exec.LookPath("protoc") - if err != nil { - assert.NoError(t, err, "Can't find 'protoc' binary in $PATH") - } else { - protocBinary = path - log.Infof("Found 'protoc' binary (%v)", protocBinary) - } -} - func testConvertSampleProto(t *testing.T, sampleProto sampleProto) { // Make a Logrus logger: @@ -79,21 +65,7 @@ func testConvertSampleProto(t *testing.T, sampleProto sampleProto) { // Open the sample proto file: sampleProtoFileName := fmt.Sprintf("%v/%v", sampleProtoDirectory, sampleProto.ProtoFileName) - - // Prepare to run the "protoc" command (generates a CodeGeneratorRequest): - protocCommand := exec.Command(protocBinary, "--descriptor_set_out=/dev/stdout", "--include_imports", fmt.Sprintf("--proto_path=%v", sampleProtoDirectory), sampleProtoFileName) - var protocCommandOutput bytes.Buffer - errChan := &bytes.Buffer{} - protocCommand.Stdout = &protocCommandOutput - protocCommand.Stderr = errChan - // Run the command: - err := protocCommand.Run() - assert.NoError(t, err, "Unable to prepare a codeGeneratorRequest using protoc (%v) for sample proto file (%v) (%s)", protocBinary, sampleProtoFileName, strings.TrimSpace(errChan.String())) - - // Unmarshal the output from the protoc command (should be a "FileDescriptorSet"): - fileDescriptorSet := new(descriptor.FileDescriptorSet) - err = proto.Unmarshal(protocCommandOutput.Bytes(), fileDescriptorSet) - assert.NoError(t, err, "Unable to unmarshal proto FileDescriptorSet for sample proto file (%v)", sampleProtoFileName) + fileDescriptorSet := mustReadProtoFiles(t, sampleProtoDirectory, sampleProto.ProtoFileName) // Prepare a request: codeGeneratorRequest := plugin.CodeGeneratorRequest{ @@ -220,4 +192,45 @@ func configureSampleProtos() { FilesToGenerate: []string{"Maps.proto"}, ProtoFileName: "Maps.proto", } + + // Comments: + sampleProtos["Comments"] = sampleProto{ + AllowNullValues: false, + ExpectedJSONSchema: []string{testdata.MessageWithComments}, + FilesToGenerate: []string{"MessageWithComments.proto"}, + ProtoFileName: "MessageWithComments.proto", + } +} + +// Load the specified .proto files into a FileDescriptorSet. Any errors in loading/parsing will +// immediately fail the test. +func mustReadProtoFiles(t *testing.T, includePath string, filenames ...string) *descriptor.FileDescriptorSet { + protocBinary, err := exec.LookPath("protoc") + if err != nil { + t.Fatalf("Can't find 'protoc' binary in $PATH: %s", err.Error()) + } + + // Use protoc to output descriptor info for the specified .proto files. + var args []string + args = append(args, "--descriptor_set_out=/dev/stdout") + args = append(args, "--include_source_info") + args = append(args, "--include_imports") + args = append(args, "--proto_path="+includePath) + args = append(args, filenames...) + cmd := exec.Command(protocBinary, args...) + stdoutBuf := bytes.Buffer{} + stderrBuf := bytes.Buffer{} + cmd.Stdout = &stdoutBuf + cmd.Stderr = &stderrBuf + err = cmd.Run() + if err != nil { + t.Fatalf("failed to load descriptor set (%s): %s: %s", + strings.Join(cmd.Args, " "), err.Error(), stderrBuf.String()) + } + fds := &descriptor.FileDescriptorSet{} + err = proto.Unmarshal(stdoutBuf.Bytes(), fds) + if err != nil { + t.Fatalf("failed to parse protoc output as FileDescriptorSet: %s", err.Error()) + } + return fds } diff --git a/internal/converter/sourcecodeinfo.go b/internal/converter/sourcecodeinfo.go new file mode 100644 index 00000000..566a25b1 --- /dev/null +++ b/internal/converter/sourcecodeinfo.go @@ -0,0 +1,116 @@ +package converter + +import ( + "github.com/golang/protobuf/proto" + "github.com/golang/protobuf/protoc-gen-go/descriptor" +) + +// Protobuf tag values for relevant message fields. Full list here: +// https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/descriptor.proto +const ( + tag_FileDescriptor_messageType int32 = 4 + tag_FileDescriptor_enumType int32 = 5 + tag_Descriptor_field int32 = 2 + tag_Descriptor_nestedType int32 = 3 + tag_Descriptor_enumType int32 = 4 + tag_Descriptor_oneofDecl int32 = 8 + tag_EnumDescriptor_value int32 = 2 +) + +type sourceCodeInfo struct { + lookup map[proto.Message]*descriptor.SourceCodeInfo_Location +} + +func (s sourceCodeInfo) GetMessage(m *descriptor.DescriptorProto) *descriptor.SourceCodeInfo_Location { + return s.lookup[m] +} + +func (s sourceCodeInfo) GetField(f *descriptor.FieldDescriptorProto) *descriptor.SourceCodeInfo_Location { + return s.lookup[f] +} + +func (s sourceCodeInfo) GetEnum(e *descriptor.EnumDescriptorProto) *descriptor.SourceCodeInfo_Location { + return s.lookup[e] +} + +func (s sourceCodeInfo) GetEnumValue(e *descriptor.EnumValueDescriptorProto) *descriptor.SourceCodeInfo_Location { + return s.lookup[e] +} + +func newSourceCodeInfo(fs []*descriptor.FileDescriptorProto) *sourceCodeInfo { + // For each source location in the provided files + // - resolve the (annoyingly) encoded path to its message/field/service/enum/etc definition + // - store the source info by its resolved definition + lookup := map[proto.Message]*descriptor.SourceCodeInfo_Location{} + for _, f := range fs { + for _, loc := range f.GetSourceCodeInfo().GetLocation() { + declaration := getDefinitionAtPath(f, loc.Path) + if declaration != nil { + lookup[declaration] = loc + } + } + } + return &sourceCodeInfo{lookup} +} + +// Resolve a protobuf "file-source path" to its associated definition (eg message/field/enum/etc). +// Note that some paths don't point to definitions (some reference subcomponents like name, type, +// field #, etc) and will therefore return nil. +func getDefinitionAtPath(file *descriptor.FileDescriptorProto, path []int32) proto.Message { + // The way protobuf encodes "file-source path" is a little opaque/tricky; + // this doc describes how it works: + // https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/descriptor.proto#L730 + + // Starting at the root of the file descriptor, traverse its object graph by following the + // specified path (and updating our position/state at each step) until either: + // - we reach the definition referenced by the path (and return it) + // - we hit a dead end because the path references a grammar element more granular than a + // definition (so we return nil) + var pos proto.Message = file + for step := 0; step < len(path); step++ { + switch p := pos.(type) { + case *descriptor.FileDescriptorProto: + switch path[step] { + case tag_FileDescriptor_messageType: + step++ + pos = p.MessageType[path[step]] + case tag_FileDescriptor_enumType: + step++ + pos = p.EnumType[path[step]] + default: + return nil // ignore all other types + } + + case *descriptor.DescriptorProto: + switch path[step] { + case tag_Descriptor_field: + step++ + pos = p.Field[path[step]] + case tag_Descriptor_nestedType: + step++ + pos = p.NestedType[path[step]] + case tag_Descriptor_enumType: + step++ + pos = p.EnumType[path[step]] + case tag_Descriptor_oneofDecl: + step++ + pos = p.OneofDecl[path[step]] + default: + return nil // ignore all other types + } + + case *descriptor.EnumDescriptorProto: + switch path[step] { + case tag_EnumDescriptor_value: + step++ + pos = p.Value[path[step]] + default: + return nil // ignore all other types + } + + default: + return nil // ignore all other types + } + } + return pos +} diff --git a/internal/converter/sourcecodeinfo_test.go b/internal/converter/sourcecodeinfo_test.go new file mode 100644 index 00000000..39e470ab --- /dev/null +++ b/internal/converter/sourcecodeinfo_test.go @@ -0,0 +1,52 @@ +package converter + +import ( + "github.com/golang/protobuf/protoc-gen-go/descriptor" + "testing" +) + +func TestSourceInfoLookup(t *testing.T) { + // Read in the test file & get references to the things we've declared. + // Note that the hardcoded indexes must reflect the declaration order in + // the .proto file. + fds := mustReadProtoFiles(t, sampleProtoDirectory, "MessageWithComments.proto") + protoFile := fds.File[0] + msgWithComments := protoFile.MessageType[0] + msgWithComments_name1 := msgWithComments.Field[0] + + // Create an instance of our thing and test that it returns the expected + // source data for each of our above declarations. + src := newSourceCodeInfo(fds.File) + assertCommentsMatch(t, src.GetMessage(msgWithComments), &descriptor.SourceCodeInfo_Location{ + LeadingComments: strPtr(" This is a message level comment and talks about what this message is and why you should care about it!\n"), + }) + assertCommentsMatch(t, src.GetField(msgWithComments_name1), &descriptor.SourceCodeInfo_Location{ + LeadingComments: strPtr(" This field is supposed to represent blahblahblah\n"), + }) +} + +func assertCommentsMatch(t *testing.T, actual, expected *descriptor.SourceCodeInfo_Location) { + if len(actual.LeadingDetachedComments) != len(expected.LeadingDetachedComments) { + t.Fatalf("Wrong value for LeadingDetachedComments.\n got: %v\n want: %v", + actual.LeadingDetachedComments, expected.LeadingDetachedComments) + } + for i := 0; i < len(actual.LeadingDetachedComments); i++ { + if actual.LeadingDetachedComments[i] != expected.LeadingDetachedComments[i] { + t.Fatalf("Wrong value for LeadingDetachedComments.\n got: %v\n want: %v", + actual.LeadingDetachedComments, expected.LeadingDetachedComments) + } + } + if actual.GetTrailingComments() != expected.GetTrailingComments() { + t.Fatalf("Wrong value for TrailingComments.\n got: %q\n want: %q", + actual.GetTrailingComments(), expected.GetTrailingComments()) + } + if actual.GetLeadingComments() != expected.GetLeadingComments() { + t.Fatalf("Wrong value for LeadingComments.\n got: %q\n want: %q", + actual.GetLeadingComments(), expected.GetLeadingComments()) + } +} + +// Go doesn't have syntax for addressing a string literal, so this is the next best thing. +func strPtr(s string) *string { + return &s +} diff --git a/internal/converter/testdata/message_with_comments.go b/internal/converter/testdata/message_with_comments.go new file mode 100644 index 00000000..b14b869b --- /dev/null +++ b/internal/converter/testdata/message_with_comments.go @@ -0,0 +1,14 @@ +package testdata + +const MessageWithComments = `{ + "$schema": "http://json-schema.org/draft-04/schema#", + "properties": { + "name1": { + "type": "string", + "description": "This field is supposed to represent blahblahblah" + } + }, + "additionalProperties": true, + "type": "object", + "description": "This is a message level comment and talks about what this message is and why you should care about it!" +}` diff --git a/internal/converter/testdata/proto/MessageWithComments.proto b/internal/converter/testdata/proto/MessageWithComments.proto new file mode 100644 index 00000000..43958911 --- /dev/null +++ b/internal/converter/testdata/proto/MessageWithComments.proto @@ -0,0 +1,9 @@ +syntax = "proto3"; +package samples; + +// This is a message level comment and talks about what this message is and why you should care about it! +message MessageWithComments { + + // This field is supposed to represent blahblahblah + string name1 = 1; +} diff --git a/internal/converter/types.go b/internal/converter/types.go index a085fc9e..dc9531d7 100644 --- a/internal/converter/types.go +++ b/internal/converter/types.go @@ -68,6 +68,11 @@ func (c *Converter) convertField(curPkg *ProtoPackage, desc *descriptor.FieldDes Properties: make(map[string]*jsonschema.Type), } + // Generate a description from src comments (if available) + if src := c.sourceInfo.GetField(desc); src != nil { + jsonSchemaType.Description = formatDescription(src) + } + // Switch the types, and pick a JSONSchema equivalent: switch desc.GetType() { case descriptor.FieldDescriptorProto_TYPE_DOUBLE, @@ -260,6 +265,10 @@ func (c *Converter) convertMessageType(curPkg *ProtoPackage, msg *descriptor.Des Properties: make(map[string]*jsonschema.Type), Version: jsonschema.Version, } + // Generate a description from src comments (if available) + if src := c.sourceInfo.GetMessage(msg); src != nil { + jsonSchemaType.Description = formatDescription(src) + } // Optionally allow NULL values: if c.AllowNullValues { @@ -293,3 +302,19 @@ func (c *Converter) convertMessageType(curPkg *ProtoPackage, msg *descriptor.Des } return jsonSchemaType, nil } + +func formatDescription(sl *descriptor.SourceCodeInfo_Location) string { + var lines []string + for _, str := range sl.GetLeadingDetachedComments() { + if s := strings.TrimSpace(str); s != "" { + lines = append(lines, s) + } + } + if s := strings.TrimSpace(sl.GetLeadingComments()); s != "" { + lines = append(lines, s) + } + if s := strings.TrimSpace(sl.GetTrailingComments()); s != "" { + lines = append(lines, s) + } + return strings.Join(lines, "\n\n") +} diff --git a/jsonschemas/MessageWithComments.jsonschema b/jsonschemas/MessageWithComments.jsonschema new file mode 100644 index 00000000..60e1744c --- /dev/null +++ b/jsonschemas/MessageWithComments.jsonschema @@ -0,0 +1,12 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "properties": { + "name1": { + "type": "string", + "description": "This field is supposed to represent blahblahblah" + } + }, + "additionalProperties": true, + "type": "object", + "description": "This is a message level comment and talks about what this message is and why you should care about it!" +} \ No newline at end of file