[avro-schema] fix enum logic (sanitize) (#19549)

This commit is contained in:
Joscha Feth 2024-09-12 10:00:19 +01:00 committed by GitHub
parent 5c6b8f3b16
commit cd349dc5ea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 133 additions and 0 deletions

View File

@ -0,0 +1,4 @@
generatorName: avro-schema
outputDir: samples/openapi3/schema/valid-enums/avro-schema-enum
inputSpec: modules/openapi-generator/src/test/resources/3_0/avro-schema/valid-enums.yaml
templateDir: modules/openapi-generator/src/main/resources/avro-schema

View File

@ -25,12 +25,20 @@ import org.openapitools.codegen.model.ModelsMap;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import static org.openapitools.codegen.utils.StringUtils.camelize;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class AvroSchemaCodegen extends DefaultCodegen implements CodegenConfig {
private final Logger LOGGER = LoggerFactory.getLogger(AvroSchemaCodegen.class);
private static final String AVRO = "avro-schema";
protected String packageName = "model";
@ -148,4 +156,44 @@ public class AvroSchemaCodegen extends DefaultCodegen implements CodegenConfig {
return input;
}
@Override
protected List<Map<String, Object>> buildEnumVars(List<Object> values, String dataType) {
List<Object> sanitizedValues = values.stream().map(Object::toString).map(this::sanitizeEnumValue)
.collect(Collectors.toList());
removeEnumValueCollisions(sanitizedValues);
return super.buildEnumVars(sanitizedValues, dataType);
}
/**
* Valid enums in Avro need to adhere to [A-Za-z_][A-Za-z0-9_]*
* See https://avro.apache.org/docs/1.12.0/specification/#enums
*/
private String sanitizeEnumValue(String value) {
// Replace any non-alphanumeric characters with an underscore
String sanitizedValue = value.replaceAll("[^A-Za-z0-9_]", "_");
// If the enum starts with a number, prefix it with an underscore
sanitizedValue = sanitizedValue.replaceAll("^([0-9])", "_$1");
return sanitizedValue;
}
private void removeEnumValueCollisions(List<Object> values) {
Collections.reverse(values);
for (int i = 0; i < values.size(); i++) {
final String value = values.get(i).toString();
long count = values.stream().filter(v1 -> v1.equals(value)).count();
if (count > 1) {
String uniqueEnumValue = getUniqueEnumValue(value.toString(), values);
LOGGER.debug("Changing duplicate enumeration value from " + value + " to " + uniqueEnumValue);
values.set(i, uniqueEnumValue);
}
}
Collections.reverse(values);
}
private String getUniqueEnumValue(String value, List<Object> values) {
long count = values.stream().filter(v -> v.equals(value)).count();
return count > 1
? getUniqueEnumValue(value + count, values)
: value;
}
}

View File

@ -0,0 +1,30 @@
openapi: 3.0.0
info:
version: 1.0.0
title: Test for valid enums
paths:
/test:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/Sample'
components:
schemas:
Sample:
properties:
type:
enum:
- 'a'
- 'b'
# This enum is invalid for Avro schemas, as it contains a `-`
- 'uh-oh'
# This next one starts with a number, which is invalid for Avro schemas
- '0h_oh'
# The next two is to make sure collisions are resolved properly
- 'coll-ision'
- 'coll_ision'
type: 'string'

View File

@ -0,0 +1,23 @@
# OpenAPI Generator Ignore
# Generated by openapi-generator https://github.com/openapitools/openapi-generator
# Use this file to prevent files from being overwritten by the generator.
# The patterns follow closely to .gitignore or .dockerignore.
# As an example, the C# client generator defines ApiClient.cs.
# You can make changes and tell OpenAPI Generator to ignore just this file by uncommenting the following line:
#ApiClient.cs
# You can match any string of characters against a directory, file or extension with a single asterisk (*):
#foo/*/qux
# The above matches foo/bar/qux and foo/baz/qux, but not foo/bar/baz/qux
# You can recursively match patterns against a directory, file or extension with a double asterisk (**):
#foo/**/qux
# This matches foo/bar/qux, foo/baz/qux, and foo/bar/baz/qux
# You can also negate patterns with an exclamation (!).
# For example, you can ignore all files in a docs folder with the file extension .md:
#docs/*.md
# Then explicitly reverse the ignore rule for a single file:
#!docs/README.md

View File

@ -0,0 +1 @@
Sample.avsc

View File

@ -0,0 +1 @@
7.9.0-SNAPSHOT

View File

@ -0,0 +1,26 @@
{
"namespace": "model",
"type": "record",
"doc": "",
"name": "Sample",
"fields": [
{
"name": "type",
"type": ["null", {
"type": "enum",
"name": "Sample_type",
"symbols": [
"a",
"b",
"uh_oh",
"_0h_oh",
"coll_ision",
"coll_ision2"
]
}],
"doc": "",
"default": null
}
]
}