Skip to content
This repository has been archived by the owner on Jun 20, 2024. It is now read-only.

Commit

Permalink
Use comments as description (#16)
Browse files Browse the repository at this point in the history
* Add failing test for desired functionality (using comments in the .proto file for each JSONSchema's description field)

* Move .proto => FileDescriptorSet loader into its own helper function so multiple tests can use it.

* Add sourceCodeInfo "indexer" (requires "--include_source_info" protoc flag to work)

You give it a list of FileDescriptor, it indexes all of the SourceCodeInfo
locations by what they reference and gives back a dictionary/registry that
can be used to look up source info for a message/field/enum/etc definition.

Example:
  fileDescriptors := ...
  fieldDescriptor := ...
  srcInfo := newSourceCodeInfo(fileDescriptors)
  fieldSrc := srcInfo.GetField(fieldDescriptor)
  println(fieldSrc.LeadingComments)

* Populate schema description from .proto file comments (requires "--include_source_info" protoc flag to work)
  • Loading branch information
ceason authored and chrusty committed Oct 6, 2019
1 parent 20d30a1 commit 277f799
Show file tree
Hide file tree
Showing 8 changed files with 278 additions and 30 deletions.
7 changes: 7 additions & 0 deletions internal/converter/converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ type Converter struct {
DisallowBigIntsAsStrings bool
UseProtoAndJSONFieldnames bool
logger *logrus.Logger
sourceInfo *sourceCodeInfo
}

// New returns a configured *Converter:
Expand Down Expand Up @@ -79,6 +80,11 @@ func (c *Converter) convertEnumType(enum *descriptor.EnumDescriptorProto) (jsons
Version: jsonschema.Version,
}

// Generate a description from src comments (if available)
if src := c.sourceInfo.GetEnum(enum); src != nil {
jsonSchemaType.Description = formatDescription(src)
}

// Allow both strings and integers:
jsonSchemaType.OneOf = append(jsonSchemaType.OneOf, &jsonschema.Type{Type: "string"})
jsonSchemaType.OneOf = append(jsonSchemaType.OneOf, &jsonschema.Type{Type: "integer"})
Expand Down Expand Up @@ -178,6 +184,7 @@ func (c *Converter) convert(req *plugin.CodeGeneratorRequest) (*plugin.CodeGener
generateTargets[file] = true
}

c.sourceInfo = newSourceCodeInfo(req.GetProtoFile())
res := &plugin.CodeGeneratorResponse{}
for _, file := range req.GetProtoFile() {
for _, msg := range file.GetMessageType() {
Expand Down
73 changes: 43 additions & 30 deletions internal/converter/converter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@ import (
"github.com/golang/protobuf/protoc-gen-go/descriptor"
plugin "github.com/golang/protobuf/protoc-gen-go/plugin"
"github.com/sirupsen/logrus"
log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
)

var (
protocBinary = "/bin/protoc"
sampleProtoDirectory = "testdata/proto"
sampleProtos = make(map[string]sampleProto)
)
Expand All @@ -33,13 +31,11 @@ type sampleProto struct {

func TestGenerateJsonSchema(t *testing.T) {

// Make sure we have "protoc" installed and available:
testForProtocBinary(t)

// Configure the list of sample protos to test, and their expected JSON-Schemas:
configureSampleProtos()

// Convert the protos, compare the results against the expected JSON-Schemas:
testConvertSampleProto(t, sampleProtos["Comments"])
testConvertSampleProto(t, sampleProtos["ArrayOfMessages"])
testConvertSampleProto(t, sampleProtos["ArrayOfObjects"])
testConvertSampleProto(t, sampleProtos["ArrayOfPrimitives"])
Expand All @@ -55,16 +51,6 @@ func TestGenerateJsonSchema(t *testing.T) {
testConvertSampleProto(t, sampleProtos["Maps"])
}

func testForProtocBinary(t *testing.T) {
path, err := exec.LookPath("protoc")
if err != nil {
assert.NoError(t, err, "Can't find 'protoc' binary in $PATH")
} else {
protocBinary = path
log.Infof("Found 'protoc' binary (%v)", protocBinary)
}
}

func testConvertSampleProto(t *testing.T, sampleProto sampleProto) {

// Make a Logrus logger:
Expand All @@ -79,21 +65,7 @@ func testConvertSampleProto(t *testing.T, sampleProto sampleProto) {

// Open the sample proto file:
sampleProtoFileName := fmt.Sprintf("%v/%v", sampleProtoDirectory, sampleProto.ProtoFileName)

// Prepare to run the "protoc" command (generates a CodeGeneratorRequest):
protocCommand := exec.Command(protocBinary, "--descriptor_set_out=/dev/stdout", "--include_imports", fmt.Sprintf("--proto_path=%v", sampleProtoDirectory), sampleProtoFileName)
var protocCommandOutput bytes.Buffer
errChan := &bytes.Buffer{}
protocCommand.Stdout = &protocCommandOutput
protocCommand.Stderr = errChan
// Run the command:
err := protocCommand.Run()
assert.NoError(t, err, "Unable to prepare a codeGeneratorRequest using protoc (%v) for sample proto file (%v) (%s)", protocBinary, sampleProtoFileName, strings.TrimSpace(errChan.String()))

// Unmarshal the output from the protoc command (should be a "FileDescriptorSet"):
fileDescriptorSet := new(descriptor.FileDescriptorSet)
err = proto.Unmarshal(protocCommandOutput.Bytes(), fileDescriptorSet)
assert.NoError(t, err, "Unable to unmarshal proto FileDescriptorSet for sample proto file (%v)", sampleProtoFileName)
fileDescriptorSet := mustReadProtoFiles(t, sampleProtoDirectory, sampleProto.ProtoFileName)

// Prepare a request:
codeGeneratorRequest := plugin.CodeGeneratorRequest{
Expand Down Expand Up @@ -220,4 +192,45 @@ func configureSampleProtos() {
FilesToGenerate: []string{"Maps.proto"},
ProtoFileName: "Maps.proto",
}

// Comments:
sampleProtos["Comments"] = sampleProto{
AllowNullValues: false,
ExpectedJSONSchema: []string{testdata.MessageWithComments},
FilesToGenerate: []string{"MessageWithComments.proto"},
ProtoFileName: "MessageWithComments.proto",
}
}

// Load the specified .proto files into a FileDescriptorSet. Any errors in loading/parsing will
// immediately fail the test.
func mustReadProtoFiles(t *testing.T, includePath string, filenames ...string) *descriptor.FileDescriptorSet {
protocBinary, err := exec.LookPath("protoc")
if err != nil {
t.Fatalf("Can't find 'protoc' binary in $PATH: %s", err.Error())
}

// Use protoc to output descriptor info for the specified .proto files.
var args []string
args = append(args, "--descriptor_set_out=/dev/stdout")
args = append(args, "--include_source_info")
args = append(args, "--include_imports")
args = append(args, "--proto_path="+includePath)
args = append(args, filenames...)
cmd := exec.Command(protocBinary, args...)
stdoutBuf := bytes.Buffer{}
stderrBuf := bytes.Buffer{}
cmd.Stdout = &stdoutBuf
cmd.Stderr = &stderrBuf
err = cmd.Run()
if err != nil {
t.Fatalf("failed to load descriptor set (%s): %s: %s",
strings.Join(cmd.Args, " "), err.Error(), stderrBuf.String())
}
fds := &descriptor.FileDescriptorSet{}
err = proto.Unmarshal(stdoutBuf.Bytes(), fds)
if err != nil {
t.Fatalf("failed to parse protoc output as FileDescriptorSet: %s", err.Error())
}
return fds
}
116 changes: 116 additions & 0 deletions internal/converter/sourcecodeinfo.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package converter

import (
"github.com/golang/protobuf/proto"
"github.com/golang/protobuf/protoc-gen-go/descriptor"
)

// Protobuf tag values for relevant message fields. Full list here:
// https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/descriptor.proto
const (
tag_FileDescriptor_messageType int32 = 4
tag_FileDescriptor_enumType int32 = 5
tag_Descriptor_field int32 = 2
tag_Descriptor_nestedType int32 = 3
tag_Descriptor_enumType int32 = 4
tag_Descriptor_oneofDecl int32 = 8
tag_EnumDescriptor_value int32 = 2
)

type sourceCodeInfo struct {
lookup map[proto.Message]*descriptor.SourceCodeInfo_Location
}

func (s sourceCodeInfo) GetMessage(m *descriptor.DescriptorProto) *descriptor.SourceCodeInfo_Location {
return s.lookup[m]
}

func (s sourceCodeInfo) GetField(f *descriptor.FieldDescriptorProto) *descriptor.SourceCodeInfo_Location {
return s.lookup[f]
}

func (s sourceCodeInfo) GetEnum(e *descriptor.EnumDescriptorProto) *descriptor.SourceCodeInfo_Location {
return s.lookup[e]
}

func (s sourceCodeInfo) GetEnumValue(e *descriptor.EnumValueDescriptorProto) *descriptor.SourceCodeInfo_Location {
return s.lookup[e]
}

func newSourceCodeInfo(fs []*descriptor.FileDescriptorProto) *sourceCodeInfo {
// For each source location in the provided files
// - resolve the (annoyingly) encoded path to its message/field/service/enum/etc definition
// - store the source info by its resolved definition
lookup := map[proto.Message]*descriptor.SourceCodeInfo_Location{}
for _, f := range fs {
for _, loc := range f.GetSourceCodeInfo().GetLocation() {
declaration := getDefinitionAtPath(f, loc.Path)
if declaration != nil {
lookup[declaration] = loc
}
}
}
return &sourceCodeInfo{lookup}
}

// Resolve a protobuf "file-source path" to its associated definition (eg message/field/enum/etc).
// Note that some paths don't point to definitions (some reference subcomponents like name, type,
// field #, etc) and will therefore return nil.
func getDefinitionAtPath(file *descriptor.FileDescriptorProto, path []int32) proto.Message {
// The way protobuf encodes "file-source path" is a little opaque/tricky;
// this doc describes how it works:
// https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/descriptor.proto#L730

// Starting at the root of the file descriptor, traverse its object graph by following the
// specified path (and updating our position/state at each step) until either:
// - we reach the definition referenced by the path (and return it)
// - we hit a dead end because the path references a grammar element more granular than a
// definition (so we return nil)
var pos proto.Message = file
for step := 0; step < len(path); step++ {
switch p := pos.(type) {
case *descriptor.FileDescriptorProto:
switch path[step] {
case tag_FileDescriptor_messageType:
step++
pos = p.MessageType[path[step]]
case tag_FileDescriptor_enumType:
step++
pos = p.EnumType[path[step]]
default:
return nil // ignore all other types
}

case *descriptor.DescriptorProto:
switch path[step] {
case tag_Descriptor_field:
step++
pos = p.Field[path[step]]
case tag_Descriptor_nestedType:
step++
pos = p.NestedType[path[step]]
case tag_Descriptor_enumType:
step++
pos = p.EnumType[path[step]]
case tag_Descriptor_oneofDecl:
step++
pos = p.OneofDecl[path[step]]
default:
return nil // ignore all other types
}

case *descriptor.EnumDescriptorProto:
switch path[step] {
case tag_EnumDescriptor_value:
step++
pos = p.Value[path[step]]
default:
return nil // ignore all other types
}

default:
return nil // ignore all other types
}
}
return pos
}
52 changes: 52 additions & 0 deletions internal/converter/sourcecodeinfo_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package converter

import (
"github.com/golang/protobuf/protoc-gen-go/descriptor"
"testing"
)

func TestSourceInfoLookup(t *testing.T) {
// Read in the test file & get references to the things we've declared.
// Note that the hardcoded indexes must reflect the declaration order in
// the .proto file.
fds := mustReadProtoFiles(t, sampleProtoDirectory, "MessageWithComments.proto")
protoFile := fds.File[0]
msgWithComments := protoFile.MessageType[0]
msgWithComments_name1 := msgWithComments.Field[0]

// Create an instance of our thing and test that it returns the expected
// source data for each of our above declarations.
src := newSourceCodeInfo(fds.File)
assertCommentsMatch(t, src.GetMessage(msgWithComments), &descriptor.SourceCodeInfo_Location{
LeadingComments: strPtr(" This is a message level comment and talks about what this message is and why you should care about it!\n"),
})
assertCommentsMatch(t, src.GetField(msgWithComments_name1), &descriptor.SourceCodeInfo_Location{
LeadingComments: strPtr(" This field is supposed to represent blahblahblah\n"),
})
}

func assertCommentsMatch(t *testing.T, actual, expected *descriptor.SourceCodeInfo_Location) {
if len(actual.LeadingDetachedComments) != len(expected.LeadingDetachedComments) {
t.Fatalf("Wrong value for LeadingDetachedComments.\n got: %v\n want: %v",
actual.LeadingDetachedComments, expected.LeadingDetachedComments)
}
for i := 0; i < len(actual.LeadingDetachedComments); i++ {
if actual.LeadingDetachedComments[i] != expected.LeadingDetachedComments[i] {
t.Fatalf("Wrong value for LeadingDetachedComments.\n got: %v\n want: %v",
actual.LeadingDetachedComments, expected.LeadingDetachedComments)
}
}
if actual.GetTrailingComments() != expected.GetTrailingComments() {
t.Fatalf("Wrong value for TrailingComments.\n got: %q\n want: %q",
actual.GetTrailingComments(), expected.GetTrailingComments())
}
if actual.GetLeadingComments() != expected.GetLeadingComments() {
t.Fatalf("Wrong value for LeadingComments.\n got: %q\n want: %q",
actual.GetLeadingComments(), expected.GetLeadingComments())
}
}

// Go doesn't have syntax for addressing a string literal, so this is the next best thing.
func strPtr(s string) *string {
return &s
}
14 changes: 14 additions & 0 deletions internal/converter/testdata/message_with_comments.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package testdata

const MessageWithComments = `{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"name1": {
"type": "string",
"description": "This field is supposed to represent blahblahblah"
}
},
"additionalProperties": true,
"type": "object",
"description": "This is a message level comment and talks about what this message is and why you should care about it!"
}`
9 changes: 9 additions & 0 deletions internal/converter/testdata/proto/MessageWithComments.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
syntax = "proto3";
package samples;

// This is a message level comment and talks about what this message is and why you should care about it!
message MessageWithComments {

// This field is supposed to represent blahblahblah
string name1 = 1;
}
25 changes: 25 additions & 0 deletions internal/converter/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ func (c *Converter) convertField(curPkg *ProtoPackage, desc *descriptor.FieldDes
Properties: make(map[string]*jsonschema.Type),
}

// Generate a description from src comments (if available)
if src := c.sourceInfo.GetField(desc); src != nil {
jsonSchemaType.Description = formatDescription(src)
}

// Switch the types, and pick a JSONSchema equivalent:
switch desc.GetType() {
case descriptor.FieldDescriptorProto_TYPE_DOUBLE,
Expand Down Expand Up @@ -260,6 +265,10 @@ func (c *Converter) convertMessageType(curPkg *ProtoPackage, msg *descriptor.Des
Properties: make(map[string]*jsonschema.Type),
Version: jsonschema.Version,
}
// Generate a description from src comments (if available)
if src := c.sourceInfo.GetMessage(msg); src != nil {
jsonSchemaType.Description = formatDescription(src)
}

// Optionally allow NULL values:
if c.AllowNullValues {
Expand Down Expand Up @@ -293,3 +302,19 @@ func (c *Converter) convertMessageType(curPkg *ProtoPackage, msg *descriptor.Des
}
return jsonSchemaType, nil
}

func formatDescription(sl *descriptor.SourceCodeInfo_Location) string {
var lines []string
for _, str := range sl.GetLeadingDetachedComments() {
if s := strings.TrimSpace(str); s != "" {
lines = append(lines, s)
}
}
if s := strings.TrimSpace(sl.GetLeadingComments()); s != "" {
lines = append(lines, s)
}
if s := strings.TrimSpace(sl.GetTrailingComments()); s != "" {
lines = append(lines, s)
}
return strings.Join(lines, "\n\n")
}
12 changes: 12 additions & 0 deletions jsonschemas/MessageWithComments.jsonschema
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"name1": {
"type": "string",
"description": "This field is supposed to represent blahblahblah"
}
},
"additionalProperties": true,
"type": "object",
"description": "This is a message level comment and talks about what this message is and why you should care about it!"
}

0 comments on commit 277f799

Please sign in to comment.