[core] Sanitize/underscore/camelize cache expiry (#5484)

The helper methods for sanitize/underscore/camelize were recently
updated to cache values in static maps. This would lead to a memory leak
in hosted environments as the maps had no cleanup/expiry.

This moves those cached entries to Caffeine caches with expiry based on
last access to allow the edge-case performance improvement gains on very
large documents, without the memory leak in hosted or embedded
environments.
This commit is contained in:
Jim Schubert
2020-03-03 04:29:51 -05:00
committed by GitHub
parent c27d4001e1
commit 51cc7c2f2a
2 changed files with 196 additions and 143 deletions

View File

@@ -17,6 +17,9 @@
package org.openapitools.codegen;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.Ticker;
import com.google.common.base.CaseFormat;
import com.google.common.collect.ImmutableMap;
import com.samskivert.mustache.Mustache;
@@ -67,6 +70,7 @@ import org.slf4j.LoggerFactory;
import java.io.File;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -80,6 +84,10 @@ public class DefaultCodegen implements CodegenConfig {
public static FeatureSet DefaultFeatureSet;
// A cache of sanitized words. The sanitizeName() method is invoked many times with the same
// arguments, this cache is used to optimized performance.
private static Cache<SanitizeNameOptions, String> sanitizedNameCache;
static {
DefaultFeatureSet = FeatureSet.newBuilder()
.includeDataTypeFeatures(
@@ -123,6 +131,12 @@ public class DefaultCodegen implements CodegenConfig {
// PROTOBUF and Custom are generator specific
)
.build();
sanitizedNameCache = Caffeine.newBuilder()
.maximumSize(500)
.expireAfterAccess(10, TimeUnit.SECONDS)
.ticker(Ticker.systemTicker())
.build();
}
protected GeneratorMetadata generatorMetadata;
@@ -4505,21 +4519,16 @@ public class DefaultCodegen implements CodegenConfig {
return sanitizeName(name, removeCharRegEx, new ArrayList<String>());
}
// A cache of sanitized words. The sanitizeName() method is invoked many times with the same
// arguments, this cache is used to optimized performance.
private static Map<String, Map<String, Map<List<String>, String>>> sanitizedNames =
new HashMap<String, Map<String, Map<List<String>, String>>>();
/**
* Sanitize name (parameter, property, method, etc)
*
* @param name string to be sanitize
* @param removeCharRegEx a regex containing all char that will be removed
* @param exceptionList a list of matches which should not be sanitized (i.e expections)
* @param exceptionList a list of matches which should not be sanitized (i.e exception)
* @return sanitized string
*/
@SuppressWarnings("static-method")
public String sanitizeName(String name, String removeCharRegEx, ArrayList<String> exceptionList) {
public String sanitizeName(final String name, String removeCharRegEx, ArrayList<String> exceptionList) {
// NOTE: performance wise, we should have written with 2 replaceAll to replace desired
// character with _ or empty character. Below aims to spell out different cases we've
// encountered so far and hopefully make it easier for others to add more special
@@ -4536,61 +4545,51 @@ public class DefaultCodegen implements CodegenConfig {
return "value";
}
Map<String, Map<List<String>, String>> m1 = sanitizedNames.get(name);
if (m1 == null) {
m1 = new HashMap<String, Map<List<String>, String>>();
sanitizedNames.put(name, m1);
}
Map<List<String>, String> m2 = m1.get(removeCharRegEx);
if (m2 == null) {
m2 = new HashMap<List<String>, String>();
m1.put(removeCharRegEx, m2);
}
List<String> l = Collections.unmodifiableList(exceptionList);
if (m2.containsKey(l)) {
return m2.get(l);
}
SanitizeNameOptions opts = new SanitizeNameOptions(name, removeCharRegEx, exceptionList);
// input[] => input
name = this.sanitizeValue(name, "\\[\\]", "", exceptionList);
return sanitizedNameCache.get(opts, sanitizeNameOptions -> {
String modifiable = sanitizeNameOptions.getName();
List<String> exceptions = sanitizeNameOptions.getExceptions();
// input[] => input
modifiable = this.sanitizeValue(modifiable, "\\[\\]", "", exceptions);
// input[a][b] => input_a_b
name = this.sanitizeValue(name, "\\[", "_", exceptionList);
name = this.sanitizeValue(name, "\\]", "", exceptionList);
// input[a][b] => input_a_b
modifiable = this.sanitizeValue(modifiable, "\\[", "_", exceptions);
modifiable = this.sanitizeValue(modifiable, "\\]", "", exceptions);
// input(a)(b) => input_a_b
name = this.sanitizeValue(name, "\\(", "_", exceptionList);
name = this.sanitizeValue(name, "\\)", "", exceptionList);
// input(a)(b) => input_a_b
modifiable = this.sanitizeValue(modifiable, "\\(", "_", exceptions);
modifiable = this.sanitizeValue(modifiable, "\\)", "", exceptions);
// input.name => input_name
name = this.sanitizeValue(name, "\\.", "_", exceptionList);
// input.name => input_name
modifiable = this.sanitizeValue(modifiable, "\\.", "_", exceptions);
// input-name => input_name
name = this.sanitizeValue(name, "-", "_", exceptionList);
// input-name => input_name
modifiable = this.sanitizeValue(modifiable, "-", "_", exceptions);
// a|b => a_b
name = this.sanitizeValue(name, "\\|", "_", exceptionList);
// a|b => a_b
modifiable = this.sanitizeValue(modifiable, "\\|", "_", exceptions);
// input name and age => input_name_and_age
name = this.sanitizeValue(name, " ", "_", exceptionList);
// input name and age => input_name_and_age
modifiable = this.sanitizeValue(modifiable, " ", "_", exceptions);
// /api/films/get => _api_films_get
// \api\films\get => _api_films_get
name = name.replaceAll("/", "_");
name = name.replaceAll("\\\\", "_");
// /api/films/get => _api_films_get
// \api\films\get => _api_films_get
modifiable = modifiable.replaceAll("/", "_");
modifiable = modifiable.replaceAll("\\\\", "_");
// remove everything else other than word, number and _
// $php_variable => php_variable
if (allowUnicodeIdentifiers) { //could be converted to a single line with ?: operator
name = Pattern.compile(removeCharRegEx, Pattern.UNICODE_CHARACTER_CLASS).matcher(name).replaceAll("");
} else {
name = name.replaceAll(removeCharRegEx, "");
}
m2.put(l, name);
return name;
// remove everything else other than word, number and _
// $php_variable => php_variable
if (allowUnicodeIdentifiers) { //could be converted to a single line with ?: operator
modifiable = Pattern.compile(sanitizeNameOptions.getRemoveCharRegEx(), Pattern.UNICODE_CHARACTER_CLASS).matcher(modifiable).replaceAll("");
} else {
modifiable = modifiable.replaceAll(sanitizeNameOptions.getRemoveCharRegEx(), "");
}
return modifiable;
});
}
private String sanitizeValue(String value, String replaceMatch, String replaceValue, ArrayList<String> exceptionList) {
private String sanitizeValue(String value, String replaceMatch, String replaceValue, List<String> exceptionList) {
if (exceptionList.size() == 0 || !exceptionList.contains(replaceMatch)) {
return value.replaceAll(replaceMatch, replaceValue);
}
@@ -5748,4 +5747,47 @@ public class DefaultCodegen implements CodegenConfig {
this.generatorMetadata = GeneratorMetadata.newBuilder(generatorMetadata)
.featureSet(builder.build()).build();
}
private static class SanitizeNameOptions {
public SanitizeNameOptions(String name, String removeCharRegEx, List<String> exceptions) {
this.name = name;
this.removeCharRegEx = removeCharRegEx;
if (exceptions != null) {
this.exceptions = Collections.unmodifiableList(exceptions);
} else {
this.exceptions = Collections.unmodifiableList(new ArrayList<>());
}
}
public String getName() {
return name;
}
public String getRemoveCharRegEx() {
return removeCharRegEx;
}
public List<String> getExceptions() {
return exceptions;
}
private String name;
private String removeCharRegEx;
private List<String> exceptions;
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
SanitizeNameOptions that = (SanitizeNameOptions) o;
return Objects.equals(getName(), that.getName()) &&
Objects.equals(getRemoveCharRegEx(), that.getRemoveCharRegEx()) &&
Objects.equals(getExceptions(), that.getExceptions());
}
@Override
public int hashCode() {
return Objects.hash(getName(), getRemoveCharRegEx(), getExceptions());
}
}
}

View File

@@ -1,24 +1,39 @@
package org.openapitools.codegen.utils;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.Ticker;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.HashMap;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class StringUtils {
// A cache of camelized words. The camelize() method is invoked many times with the same
// arguments, this cache is used to optimized performance.
private static Map<Boolean, Map<String, String>> camelizedWords =
new HashMap<Boolean, Map<String, String>>();
private static Cache<Pair<String, Boolean>, String> camelizedWordsCache;
// A cache of underscored words, used to optimize the performance of the underscore() method.
private static Map<String, String> underscoreWords = new HashMap<String, String>();
private static Cache<String, String> underscoreWords;
static {
camelizedWords.put(false, new HashMap<String, String>());
camelizedWords.put(true, new HashMap<String, String>());
camelizedWordsCache = Caffeine.newBuilder()
.maximumSize(200)
.expireAfterAccess(5, TimeUnit.SECONDS)
.ticker(Ticker.systemTicker())
.build();
underscoreWords = Caffeine.newBuilder()
.maximumSize(200)
.expireAfterAccess(5, TimeUnit.SECONDS)
.ticker(Ticker.systemTicker())
.build();
}
/**
@@ -30,26 +45,24 @@ public class StringUtils {
* @return The underscored version of the word
*/
public static String underscore(final String word) {
String result = underscoreWords.get(word);
if (result != null) {
return underscoreWords.get(word, wordToUnderscore -> {
String result;
String firstPattern = "([A-Z]+)([A-Z][a-z])";
String secondPattern = "([a-z\\d])([A-Z])";
String replacementPattern = "$1_$2";
// Replace package separator with slash.
result = wordToUnderscore.replaceAll("\\.", "/");
// Replace $ with two underscores for inner classes.
result = result.replaceAll("\\$", "__");
// Replace capital letter with _ plus lowercase letter.
result = result.replaceAll(firstPattern, replacementPattern);
result = result.replaceAll(secondPattern, replacementPattern);
result = result.replace('-', '_');
// replace space with underscore
result = result.replace(' ', '_');
result = result.toLowerCase(Locale.ROOT);
return result;
}
String firstPattern = "([A-Z]+)([A-Z][a-z])";
String secondPattern = "([a-z\\d])([A-Z])";
String replacementPattern = "$1_$2";
// Replace package separator with slash.
result = word.replaceAll("\\.", "/");
// Replace $ with two underscores for inner classes.
result = result.replaceAll("\\$", "__");
// Replace capital letter with _ plus lowercase letter.
result = result.replaceAll(firstPattern, replacementPattern);
result = result.replaceAll(secondPattern, replacementPattern);
result = result.replace('-', '_');
// replace space with underscore
result = result.replace(' ', '_');
result = result.toLowerCase(Locale.ROOT);
underscoreWords.put(word, result);
return result;
});
}
/**
@@ -82,84 +95,82 @@ public class StringUtils {
/**
* Camelize name (parameter, property, method, etc)
*
* @param word string to be camelize
* @param inputWord string to be camelize
* @param lowercaseFirstLetter lower case for first letter if set to true
* @return camelized string
*/
public static String camelize(String word, boolean lowercaseFirstLetter) {
String inputWord = word;
String camelized = camelizedWords.get(lowercaseFirstLetter).get(word);
if (camelized != null) {
return camelized;
}
// Replace all slashes with dots (package separator)
Matcher m = camelizeSlashPattern.matcher(word);
while (m.find()) {
word = m.replaceFirst("." + m.group(1)/*.toUpperCase()*/); // FIXME: a parameter should not be assigned. Also declare the methods parameters as 'final'.
m = camelizeSlashPattern.matcher(word);
}
public static String camelize(final String inputWord, boolean lowercaseFirstLetter) {
Pair<String, Boolean> key = new ImmutablePair<>(inputWord, lowercaseFirstLetter);
// case out dots
String[] parts = word.split("\\.");
StringBuilder f = new StringBuilder();
for (String z : parts) {
if (z.length() > 0) {
f.append(Character.toUpperCase(z.charAt(0))).append(z.substring(1));
return camelizedWordsCache.get(key, pair -> {
String word = pair.getKey();
Boolean lowerFirstLetter = pair.getValue();
// Replace all slashes with dots (package separator)
Matcher m = camelizeSlashPattern.matcher(word);
while (m.find()) {
word = m.replaceFirst("." + m.group(1)/*.toUpperCase()*/);
m = camelizeSlashPattern.matcher(word);
}
}
word = f.toString();
m = camelizeSlashPattern.matcher(word);
while (m.find()) {
word = m.replaceFirst("" + Character.toUpperCase(m.group(1).charAt(0)) + m.group(1).substring(1)/*.toUpperCase()*/);
m = camelizeSlashPattern.matcher(word);
}
// Uppercase the class name.
m = camelizeUppercasePattern.matcher(word);
if (m.find()) {
String rep = m.group(1) + m.group(2).toUpperCase(Locale.ROOT) + m.group(3);
rep = rep.replaceAll("\\$", "\\\\\\$");
word = m.replaceAll(rep);
}
// Remove all underscores (underscore_case to camelCase)
m = camelizeUnderscorePattern.matcher(word);
while (m.find()) {
String original = m.group(2);
String upperCase = original.toUpperCase(Locale.ROOT);
if (original.equals(upperCase)) {
word = word.replaceFirst("_", "");
} else {
word = m.replaceFirst(upperCase);
// case out dots
String[] parts = word.split("\\.");
StringBuilder f = new StringBuilder();
for (String z : parts) {
if (z.length() > 0) {
f.append(Character.toUpperCase(z.charAt(0))).append(z.substring(1));
}
}
word = f.toString();
m = camelizeSlashPattern.matcher(word);
while (m.find()) {
word = m.replaceFirst("" + Character.toUpperCase(m.group(1).charAt(0)) + m.group(1).substring(1)/*.toUpperCase()*/);
m = camelizeSlashPattern.matcher(word);
}
// Uppercase the class name.
m = camelizeUppercasePattern.matcher(word);
if (m.find()) {
String rep = m.group(1) + m.group(2).toUpperCase(Locale.ROOT) + m.group(3);
rep = rep.replaceAll("\\$", "\\\\\\$");
word = m.replaceAll(rep);
}
// Remove all underscores (underscore_case to camelCase)
m = camelizeUnderscorePattern.matcher(word);
}
// Remove all hyphens (hyphen-case to camelCase)
m = camelizeHyphenPattern.matcher(word);
while (m.find()) {
word = m.replaceFirst(m.group(2).toUpperCase(Locale.ROOT));
m = camelizeHyphenPattern.matcher(word);
}
if (lowercaseFirstLetter && word.length() > 0) {
int i = 0;
char charAt = word.charAt(i);
while (i + 1 < word.length() && !((charAt >= 'a' && charAt <= 'z') || (charAt >= 'A' && charAt <= 'Z'))) {
i = i + 1;
charAt = word.charAt(i);
while (m.find()) {
String original = m.group(2);
String upperCase = original.toUpperCase(Locale.ROOT);
if (original.equals(upperCase)) {
word = word.replaceFirst("_", "");
} else {
word = m.replaceFirst(upperCase);
}
m = camelizeUnderscorePattern.matcher(word);
}
i = i + 1;
word = word.substring(0, i).toLowerCase(Locale.ROOT) + word.substring(i);
}
// remove all underscore
word = word.replaceAll("_", "");
// Remove all hyphens (hyphen-case to camelCase)
m = camelizeHyphenPattern.matcher(word);
while (m.find()) {
word = m.replaceFirst(m.group(2).toUpperCase(Locale.ROOT));
m = camelizeHyphenPattern.matcher(word);
}
// Add to the cache.
camelizedWords.get(lowercaseFirstLetter).put(inputWord, word);
return word;
if (lowerFirstLetter && word.length() > 0) {
int i = 0;
char charAt = word.charAt(i);
while (i + 1 < word.length() && !((charAt >= 'a' && charAt <= 'z') || (charAt >= 'A' && charAt <= 'Z'))) {
i = i + 1;
charAt = word.charAt(i);
}
i = i + 1;
word = word.substring(0, i).toLowerCase(Locale.ROOT) + word.substring(i);
}
// remove all underscore
word = word.replaceAll("_", "");
return word;
});
}
/**