Options to allow unicode character in identifier names (#4508)

* Added option to allow unicode identifiers in class names, method names etc.
Added option to allow keeping of underscore characters in class names, methods names, etc.

Unicode identifiers are supported by some languages, like Java, but the codegen will remove all non ASCII letters by default. Users might want to separate different parts of names using extended punctuation connector characters, like "_, ‿, ⁀, ⁔, ・, ︳, ︴, ﹍, ﹎, ﹏, _, ・".
Underscores cannot be used to separate different parts of the name as they're removed by default by the codegen, the second option allows underscore characters if the user so desires.
Both options can be used separately if needed, for example one might allow extended punctuation connector characters, but still disallow underscore.

* Added new command line options to all required unit tests.

* Added KEEP_UNDERSCORES and ALLOW_UNICODE_IDENTIFIERS to Bash tests.

* When KEEP_UNDERSCORES is set don't camelize the names, keep the identifier case as is (you probably don't want camel case + snake case, just snake case when KEEP_UNDERSCORES is set).
Added unit tests to verify how the case is computed for various scenarios.

* Reworked pull request to only include changes related to supporting unicode characters in identifiers (removed references to keep underscores).

* These methods and classes can be static again.
This commit is contained in:
eblis
2017-01-20 08:42:08 +02:00
committed by wing328
parent 029728d851
commit 3b3f2d2fd6
41 changed files with 154 additions and 8 deletions

View File

@@ -12,6 +12,8 @@ public class CodegenConstants {
public static final String TEMPLATE_DIR = "templateDir";
public static final String ALLOW_UNICODE_IDENTIFIERS = "allowUnicodeIdentifiers";
public static final String ALLOW_UNICODE_IDENTIFIERS_DESC = "boolean, toggles whether unicode identifiers are allowed in names or not, default is false";
public static final String INVOKER_PACKAGE = "invokerPackage";
public static final String INVOKER_PACKAGE_DESC = "root package for generated code";

View File

@@ -107,6 +107,7 @@ public class DefaultCodegen {
protected String library;
protected Boolean sortParamsByRequiredFlag = true;
protected Boolean ensureUniqueParams = true;
protected Boolean allowUnicodeIdentifiers = false;
protected String gitUserId, gitRepoId, releaseNote;
protected String httpUserAgent;
protected Boolean hideGenerationTimestamp = true;
@@ -144,6 +145,11 @@ public class DefaultCodegen {
.get(CodegenConstants.ENSURE_UNIQUE_PARAMS).toString()));
}
if (additionalProperties.containsKey(CodegenConstants.ALLOW_UNICODE_IDENTIFIERS)) {
this.setAllowUnicodeIdentifiers(Boolean.valueOf(additionalProperties
.get(CodegenConstants.ALLOW_UNICODE_IDENTIFIERS).toString()));
}
if(additionalProperties.containsKey(CodegenConstants.MODEL_NAME_PREFIX)){
this.setModelNamePrefix((String) additionalProperties.get(CodegenConstants.MODEL_NAME_PREFIX));
}
@@ -151,7 +157,6 @@ public class DefaultCodegen {
if(additionalProperties.containsKey(CodegenConstants.MODEL_NAME_SUFFIX)){
this.setModelNameSuffix((String) additionalProperties.get(CodegenConstants.MODEL_NAME_SUFFIX));
}
}
// override with any special post-processing for all models
@@ -582,6 +587,10 @@ public class DefaultCodegen {
this.ensureUniqueParams = ensureUniqueParams;
}
public void setAllowUnicodeIdentifiers(Boolean allowUnicodeIdentifiers) {
this.allowUnicodeIdentifiers = allowUnicodeIdentifiers;
}
/**
* Return the regular expression/JSON schema pattern (http://json-schema.org/latest/json-schema-validation.html#anchor33)
*
@@ -836,6 +845,10 @@ public class DefaultCodegen {
cliOptions.add(CliOption.newBoolean(CodegenConstants.ENSURE_UNIQUE_PARAMS, CodegenConstants
.ENSURE_UNIQUE_PARAMS_DESC).defaultValue(Boolean.TRUE.toString()));
//name formatting options
cliOptions.add(CliOption.newBoolean(CodegenConstants.ALLOW_UNICODE_IDENTIFIERS, CodegenConstants
.ALLOW_UNICODE_IDENTIFIERS_DESC).defaultValue(Boolean.FALSE.toString()));
// initialize special character mapping
initalizeSpecialCharacterMapping();
}
@@ -3257,7 +3270,14 @@ public class DefaultCodegen {
// remove everything else other than word, number and _
// $php_variable => php_variable
return name.replaceAll("[^a-zA-Z0-9_]", "");
if (allowUnicodeIdentifiers) { //could be converted to a single line with ?: operator
name = Pattern.compile("\\W", Pattern.UNICODE_CHARACTER_CLASS).matcher(name).replaceAll("");
}
else {
name = name.replaceAll("\\W", "");
}
return name;
}
/**

View File

@@ -1,6 +1,9 @@
package io.swagger.codegen.languages;
import com.google.common.collect.ImmutableMap;
import com.sun.org.apache.bcel.internal.classfile.Code;
import io.swagger.codegen.CodegenConstants;
import io.swagger.codegen.CodegenType;
import io.swagger.codegen.CodegenModel;
@@ -141,6 +144,10 @@ public class CSharpClientCodegen extends AbstractCSharpCodegen {
CodegenConstants.NON_PUBLIC_API_DESC,
this.nonPublicApi);
addSwitch(CodegenConstants.ALLOW_UNICODE_IDENTIFIERS,
CodegenConstants.ALLOW_UNICODE_IDENTIFIERS_DESC,
this.allowUnicodeIdentifiers);
regexModifiers = new HashMap<Character, String>();
regexModifiers.put('i', "IgnoreCase");
regexModifiers.put('m', "Multiline");