001/* 002 * ModeShape (http://www.modeshape.org) 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.modeshape.common.text; 017 018import java.util.HashSet; 019import java.util.LinkedList; 020import java.util.Set; 021import java.util.regex.Matcher; 022import java.util.regex.Pattern; 023import org.modeshape.common.annotation.ThreadSafe; 024import org.modeshape.common.util.CheckArg; 025 026/** 027 * Transforms words to singular, plural, humanized (human readable), underscore, camel case, or ordinal form. This is inspired by 028 * the <a href="http://api.rubyonrails.org/classes/Inflector.html">Inflector</a> class in <a 029 * href="http://www.rubyonrails.org">Ruby on Rails</a>, which is distributed under the <a 030 * href="http://wiki.rubyonrails.org/rails/pages/License">Rails license</a>. 031 */ 032@ThreadSafe 033public class Inflector { 034 035 protected static final Inflector INSTANCE = new Inflector(); 036 037 public static final Inflector getInstance() { 038 return INSTANCE; 039 } 040 041 protected class Rule { 042 043 protected final String expression; 044 protected final Pattern expressionPattern; 045 protected final String replacement; 046 047 protected Rule( String expression, 048 String replacement ) { 049 this.expression = expression; 050 this.replacement = replacement != null ? replacement : ""; 051 this.expressionPattern = Pattern.compile(this.expression, Pattern.CASE_INSENSITIVE); 052 } 053 054 /** 055 * Apply the rule against the input string, returning the modified string or null if the rule didn't apply (and no 056 * modifications were made) 057 * 058 * @param input the input string 059 * @return the modified string if this rule applied, or null if the input was not modified by this rule 060 */ 061 protected String apply( String input ) { 062 Matcher matcher = this.expressionPattern.matcher(input); 063 if (!matcher.find()) return null; 064 return matcher.replaceAll(this.replacement); 065 } 066 067 @Override 068 public int hashCode() { 069 return expression.hashCode(); 070 } 071 072 @Override 073 public boolean equals( Object obj ) { 074 if (obj == this) return true; 075 if (obj != null && obj.getClass() == this.getClass()) { 076 final Rule that = (Rule)obj; 077 if (this.expression.equalsIgnoreCase(that.expression)) return true; 078 } 079 return false; 080 } 081 082 @Override 083 public String toString() { 084 return expression + ", " + replacement; 085 } 086 } 087 088 private LinkedList<Rule> plurals = new LinkedList<Rule>(); 089 private LinkedList<Rule> singulars = new LinkedList<Rule>(); 090 /** 091 * The lowercase words that are to be excluded and not processed. This map can be modified by the users via 092 * {@link #getUncountables()}. 093 */ 094 private final Set<String> uncountables = new HashSet<String>(); 095 096 public Inflector() { 097 initialize(); 098 } 099 100 protected Inflector( Inflector original ) { 101 this.plurals.addAll(original.plurals); 102 this.singulars.addAll(original.singulars); 103 this.uncountables.addAll(original.uncountables); 104 } 105 106 @Override 107 public Inflector clone() { 108 return new Inflector(this); 109 } 110 111 // ------------------------------------------------------------------------------------------------ 112 // Usage functions 113 // ------------------------------------------------------------------------------------------------ 114 115 /** 116 * Returns the plural form of the word in the string. 117 * <p> 118 * Examples: 119 * 120 * <pre> 121 * inflector.pluralize("post") #=> "posts" 122 * inflector.pluralize("octopus") #=> "octopi" 123 * inflector.pluralize("sheep") #=> "sheep" 124 * inflector.pluralize("words") #=> "words" 125 * inflector.pluralize("the blue mailman") #=> "the blue mailmen" 126 * inflector.pluralize("CamelOctopus") #=> "CamelOctopi" 127 * </pre> 128 * 129 * </p> 130 * <p> 131 * Note that if the {@link Object#toString()} is called on the supplied object, so this method works for non-strings, too. 132 * </p> 133 * 134 * @param word the word that is to be pluralized. 135 * @return the pluralized form of the word, or the word itself if it could not be pluralized 136 * @see #singularize(Object) 137 */ 138 public String pluralize( Object word ) { 139 if (word == null) return null; 140 String wordStr = word.toString().trim(); 141 if (wordStr.length() == 0) return wordStr; 142 if (isUncountable(wordStr)) return wordStr; 143 for (Rule rule : this.plurals) { 144 String result = rule.apply(wordStr); 145 if (result != null) return result; 146 } 147 return wordStr; 148 } 149 150 public String pluralize( Object word, 151 int count ) { 152 if (word == null) return null; 153 if (count == 1 || count == -1) { 154 return word.toString(); 155 } 156 return pluralize(word); 157 } 158 159 /** 160 * Returns the singular form of the word in the string. 161 * <p> 162 * Examples: 163 * 164 * <pre> 165 * inflector.singularize("posts") #=> "post" 166 * inflector.singularize("octopi") #=> "octopus" 167 * inflector.singularize("sheep") #=> "sheep" 168 * inflector.singularize("words") #=> "word" 169 * inflector.singularize("the blue mailmen") #=> "the blue mailman" 170 * inflector.singularize("CamelOctopi") #=> "CamelOctopus" 171 * </pre> 172 * 173 * </p> 174 * <p> 175 * Note that if the {@link Object#toString()} is called on the supplied object, so this method works for non-strings, too. 176 * </p> 177 * 178 * @param word the word that is to be pluralized. 179 * @return the pluralized form of the word, or the word itself if it could not be pluralized 180 * @see #pluralize(Object) 181 */ 182 public String singularize( Object word ) { 183 if (word == null) return null; 184 String wordStr = word.toString().trim(); 185 if (wordStr.length() == 0) return wordStr; 186 if (isUncountable(wordStr)) return wordStr; 187 for (Rule rule : this.singulars) { 188 String result = rule.apply(wordStr); 189 if (result != null) return result; 190 } 191 return wordStr; 192 } 193 194 /** 195 * Converts strings to lowerCamelCase. This method will also use any extra delimiter characters to identify word boundaries. 196 * <p> 197 * Examples: 198 * 199 * <pre> 200 * inflector.lowerCamelCase("active_record") #=> "activeRecord" 201 * inflector.lowerCamelCase("first_name") #=> "firstName" 202 * inflector.lowerCamelCase("name") #=> "name" 203 * inflector.lowerCamelCase("the-first_name",'-') #=> "theFirstName" 204 * </pre> 205 * 206 * </p> 207 * 208 * @param lowerCaseAndUnderscoredWord the word that is to be converted to camel case 209 * @param delimiterChars optional characters that are used to delimit word boundaries 210 * @return the lower camel case version of the word 211 * @see #underscore(String, char[]) 212 * @see #camelCase(String, boolean, char[]) 213 * @see #upperCamelCase(String, char[]) 214 */ 215 public String lowerCamelCase( String lowerCaseAndUnderscoredWord, 216 char... delimiterChars ) { 217 return camelCase(lowerCaseAndUnderscoredWord, false, delimiterChars); 218 } 219 220 /** 221 * Converts strings to UpperCamelCase. This method will also use any extra delimiter characters to identify word boundaries. 222 * <p> 223 * Examples: 224 * 225 * <pre> 226 * inflector.upperCamelCase("active_record") #=> "SctiveRecord" 227 * inflector.upperCamelCase("first_name") #=> "FirstName" 228 * inflector.upperCamelCase("name") #=> "Name" 229 * inflector.lowerCamelCase("the-first_name",'-') #=> "TheFirstName" 230 * </pre> 231 * 232 * </p> 233 * 234 * @param lowerCaseAndUnderscoredWord the word that is to be converted to camel case 235 * @param delimiterChars optional characters that are used to delimit word boundaries 236 * @return the upper camel case version of the word 237 * @see #underscore(String, char[]) 238 * @see #camelCase(String, boolean, char[]) 239 * @see #lowerCamelCase(String, char[]) 240 */ 241 public String upperCamelCase( String lowerCaseAndUnderscoredWord, 242 char... delimiterChars ) { 243 return camelCase(lowerCaseAndUnderscoredWord, true, delimiterChars); 244 } 245 246 /** 247 * By default, this method converts strings to UpperCamelCase. If the <code>uppercaseFirstLetter</code> argument to false, 248 * then this method produces lowerCamelCase. This method will also use any extra delimiter characters to identify word 249 * boundaries. 250 * <p> 251 * Examples: 252 * 253 * <pre> 254 * inflector.camelCase("active_record",false) #=> "activeRecord" 255 * inflector.camelCase("active_record",true) #=> "ActiveRecord" 256 * inflector.camelCase("first_name",false) #=> "firstName" 257 * inflector.camelCase("first_name",true) #=> "FirstName" 258 * inflector.camelCase("name",false) #=> "name" 259 * inflector.camelCase("name",true) #=> "Name" 260 * </pre> 261 * 262 * </p> 263 * 264 * @param lowerCaseAndUnderscoredWord the word that is to be converted to camel case 265 * @param uppercaseFirstLetter true if the first character is to be uppercased, or false if the first character is to be 266 * lowercased 267 * @param delimiterChars optional characters that are used to delimit word boundaries 268 * @return the camel case version of the word 269 * @see #underscore(String, char[]) 270 * @see #upperCamelCase(String, char[]) 271 * @see #lowerCamelCase(String, char[]) 272 */ 273 public String camelCase( String lowerCaseAndUnderscoredWord, 274 boolean uppercaseFirstLetter, 275 char... delimiterChars ) { 276 if (lowerCaseAndUnderscoredWord == null) return null; 277 lowerCaseAndUnderscoredWord = lowerCaseAndUnderscoredWord.trim(); 278 if (lowerCaseAndUnderscoredWord.length() == 0) return ""; 279 if (uppercaseFirstLetter) { 280 String result = lowerCaseAndUnderscoredWord; 281 // Replace any extra delimiters with underscores (before the underscores are converted in the next step)... 282 if (delimiterChars != null) { 283 for (char delimiterChar : delimiterChars) { 284 result = result.replace(delimiterChar, '_'); 285 } 286 } 287 288 // Change the case at the beginning at after each underscore ... 289 return replaceAllWithUppercase(result, "(^|_)(.)", 2); 290 } 291 if (lowerCaseAndUnderscoredWord.length() < 2) return lowerCaseAndUnderscoredWord; 292 return "" + Character.toLowerCase(lowerCaseAndUnderscoredWord.charAt(0)) 293 + camelCase(lowerCaseAndUnderscoredWord, true, delimiterChars).substring(1); 294 } 295 296 /** 297 * Makes an underscored form from the expression in the string (the reverse of the {@link #camelCase(String, boolean, char[]) 298 * camelCase} method. Also changes any characters that match the supplied delimiters into underscore. 299 * <p> 300 * Examples: 301 * 302 * <pre> 303 * inflector.underscore("activeRecord") #=> "active_record" 304 * inflector.underscore("ActiveRecord") #=> "active_record" 305 * inflector.underscore("firstName") #=> "first_name" 306 * inflector.underscore("FirstName") #=> "first_name" 307 * inflector.underscore("name") #=> "name" 308 * inflector.underscore("The.firstName") #=> "the_first_name" 309 * </pre> 310 * 311 * </p> 312 * 313 * @param camelCaseWord the camel-cased word that is to be converted; 314 * @param delimiterChars optional characters that are used to delimit word boundaries (beyond capitalization) 315 * @return a lower-cased version of the input, with separate words delimited by the underscore character. 316 */ 317 public String underscore( String camelCaseWord, 318 char... delimiterChars ) { 319 if (camelCaseWord == null) return null; 320 String result = camelCaseWord.trim(); 321 if (result.length() == 0) return ""; 322 result = result.replaceAll("([A-Z]+)([A-Z][a-z])", "$1_$2"); 323 result = result.replaceAll("([a-z\\d])([A-Z])", "$1_$2"); 324 result = result.replace('-', '_'); 325 if (delimiterChars != null) { 326 for (char delimiterChar : delimiterChars) { 327 result = result.replace(delimiterChar, '_'); 328 } 329 } 330 return result.toLowerCase(); 331 } 332 333 /** 334 * Returns a copy of the input with the first character converted to uppercase and the remainder to lowercase. 335 * 336 * @param words the word to be capitalized 337 * @return the string with the first character capitalized and the remaining characters lowercased 338 */ 339 public String capitalize( String words ) { 340 if (words == null) return null; 341 String result = words.trim(); 342 if (result.length() == 0) return ""; 343 if (result.length() == 1) return result.toUpperCase(); 344 return "" + Character.toUpperCase(result.charAt(0)) + result.substring(1).toLowerCase(); 345 } 346 347 /** 348 * Capitalizes the first word and turns underscores into spaces and strips trailing "_id" and any supplied removable tokens. 349 * Like {@link #titleCase(String, String[])}, this is meant for creating pretty output. 350 * <p> 351 * Examples: 352 * 353 * <pre> 354 * inflector.humanize("employee_salary") #=> "Employee salary" 355 * inflector.humanize("author_id") #=> "Author" 356 * </pre> 357 * 358 * </p> 359 * 360 * @param lowerCaseAndUnderscoredWords the input to be humanized 361 * @param removableTokens optional array of tokens that are to be removed 362 * @return the humanized string 363 * @see #titleCase(String, String[]) 364 */ 365 public String humanize( String lowerCaseAndUnderscoredWords, 366 String... removableTokens ) { 367 if (lowerCaseAndUnderscoredWords == null) return null; 368 String result = lowerCaseAndUnderscoredWords.trim(); 369 if (result.length() == 0) return ""; 370 // Remove a trailing "_id" token 371 result = result.replaceAll("_id$", ""); 372 // Remove all of the tokens that should be removed 373 if (removableTokens != null) { 374 for (String removableToken : removableTokens) { 375 result = result.replaceAll(removableToken, ""); 376 } 377 } 378 result = result.replaceAll("_+", " "); // replace all adjacent underscores with a single space 379 return capitalize(result); 380 } 381 382 /** 383 * Capitalizes all the words and replaces some characters in the string to create a nicer looking title. Underscores are 384 * changed to spaces, a trailing "_id" is removed, and any of the supplied tokens are removed. Like 385 * {@link #humanize(String, String[])}, this is meant for creating pretty output. 386 * <p> 387 * Examples: 388 * 389 * <pre> 390 * inflector.titleCase("man from the boondocks") #=> "Man From The Boondocks" 391 * inflector.titleCase("x-men: the last stand") #=> "X Men: The Last Stand" 392 * </pre> 393 * 394 * </p> 395 * 396 * @param words the input to be turned into title case 397 * @param removableTokens optional array of tokens that are to be removed 398 * @return the title-case version of the supplied words 399 */ 400 public String titleCase( String words, 401 String... removableTokens ) { 402 String result = humanize(words, removableTokens); 403 result = replaceAllWithUppercase(result, "\\b([a-z])", 1); // change first char of each word to uppercase 404 return result; 405 } 406 407 /** 408 * Turns a non-negative number into an ordinal string used to denote the position in an ordered sequence, such as 1st, 2nd, 409 * 3rd, 4th. 410 * 411 * @param number the non-negative number 412 * @return the string with the number and ordinal suffix 413 */ 414 public String ordinalize( int number ) { 415 int remainder = number % 100; 416 String numberStr = Integer.toString(number); 417 if (11 <= number && number <= 13) return numberStr + "th"; 418 remainder = number % 10; 419 if (remainder == 1) return numberStr + "st"; 420 if (remainder == 2) return numberStr + "nd"; 421 if (remainder == 3) return numberStr + "rd"; 422 return numberStr + "th"; 423 } 424 425 // ------------------------------------------------------------------------------------------------ 426 // Management methods 427 // ------------------------------------------------------------------------------------------------ 428 429 /** 430 * Determine whether the supplied word is considered uncountable by the {@link #pluralize(Object) pluralize} and 431 * {@link #singularize(Object) singularize} methods. 432 * 433 * @param word the word 434 * @return true if the plural and singular forms of the word are the same 435 */ 436 public boolean isUncountable( String word ) { 437 if (word == null) return false; 438 String trimmedLower = word.trim().toLowerCase(); 439 return this.uncountables.contains(trimmedLower); 440 } 441 442 /** 443 * Get the set of words that are not processed by the Inflector. The resulting map is directly modifiable. 444 * 445 * @return the set of uncountable words 446 */ 447 public Set<String> getUncountables() { 448 return uncountables; 449 } 450 451 public void addPluralize( String rule, 452 String replacement ) { 453 final Rule pluralizeRule = new Rule(rule, replacement); 454 this.plurals.addFirst(pluralizeRule); 455 } 456 457 public void addSingularize( String rule, 458 String replacement ) { 459 final Rule singularizeRule = new Rule(rule, replacement); 460 this.singulars.addFirst(singularizeRule); 461 } 462 463 public void addIrregular( String singular, 464 String plural ) { 465 CheckArg.isNotEmpty(singular, "singular rule"); 466 CheckArg.isNotEmpty(plural, "plural rule"); 467 String singularRemainder = singular.length() > 1 ? singular.substring(1) : ""; 468 String pluralRemainder = plural.length() > 1 ? plural.substring(1) : ""; 469 addPluralize("(" + singular.charAt(0) + ")" + singularRemainder + "$", "$1" + pluralRemainder); 470 addSingularize("(" + plural.charAt(0) + ")" + pluralRemainder + "$", "$1" + singularRemainder); 471 } 472 473 public void addUncountable( String... words ) { 474 if (words == null || words.length == 0) return; 475 for (String word : words) { 476 if (word != null) uncountables.add(word.trim().toLowerCase()); 477 } 478 } 479 480 /** 481 * Utility method to replace all occurrences given by the specific backreference with its uppercased form, and remove all 482 * other backreferences. 483 * <p> 484 * The Java {@link Pattern regular expression processing} does not use the preprocessing directives <code>\l</code>, 485 * <code>\u</code>, <code>\L</code>, and <code>\U</code>. If so, such directives could be used in the replacement string 486 * to uppercase or lowercase the backreferences. For example, <code>\L1</code> would lowercase the first backreference, and 487 * <code>\u3</code> would uppercase the 3rd backreference. 488 * </p> 489 * 490 * @param input 491 * @param regex 492 * @param groupNumberToUppercase 493 * @return the input string with the appropriate characters converted to upper-case 494 */ 495 protected static String replaceAllWithUppercase( String input, 496 String regex, 497 int groupNumberToUppercase ) { 498 Pattern underscoreAndDotPattern = Pattern.compile(regex); 499 Matcher matcher = underscoreAndDotPattern.matcher(input); 500 // CHECKSTYLE IGNORE check FOR NEXT 1 LINES 501 StringBuffer sb = new StringBuffer(); 502 while (matcher.find()) { 503 matcher.appendReplacement(sb, matcher.group(groupNumberToUppercase).toUpperCase()); 504 } 505 matcher.appendTail(sb); 506 return sb.toString(); 507 } 508 509 /** 510 * Completely remove all rules within this inflector. 511 */ 512 public void clear() { 513 this.uncountables.clear(); 514 this.plurals.clear(); 515 this.singulars.clear(); 516 } 517 518 protected void initialize() { 519 Inflector inflect = this; 520 inflect.addPluralize("$", "s"); 521 inflect.addPluralize("s$", "s"); 522 inflect.addPluralize("(ax|test)is$", "$1es"); 523 inflect.addPluralize("(octop|vir)us$", "$1i"); 524 inflect.addPluralize("(octop|vir)i$", "$1i"); // already plural 525 inflect.addPluralize("(alias|status)$", "$1es"); 526 inflect.addPluralize("(bu)s$", "$1ses"); 527 inflect.addPluralize("(buffal|tomat)o$", "$1oes"); 528 inflect.addPluralize("([ti])um$", "$1a"); 529 inflect.addPluralize("([ti])a$", "$1a"); // already plural 530 inflect.addPluralize("sis$", "ses"); 531 inflect.addPluralize("(?:([^f])fe|([lr])f)$", "$1$2ves"); 532 inflect.addPluralize("(hive)$", "$1s"); 533 inflect.addPluralize("([^aeiouy]|qu)y$", "$1ies"); 534 inflect.addPluralize("(x|ch|ss|sh)$", "$1es"); 535 inflect.addPluralize("(matr|vert|ind)ix|ex$", "$1ices"); 536 inflect.addPluralize("([m|l])ouse$", "$1ice"); 537 inflect.addPluralize("([m|l])ice$", "$1ice"); 538 inflect.addPluralize("^(ox)$", "$1en"); 539 inflect.addPluralize("(quiz)$", "$1zes"); 540 // Need to check for the following words that are already pluralized: 541 inflect.addPluralize("(people|men|children|sexes|moves|stadiums)$", "$1"); // irregulars 542 inflect.addPluralize("(oxen|octopi|viri|aliases|quizzes)$", "$1"); // special rules 543 544 inflect.addSingularize("s$", ""); 545 inflect.addSingularize("(s|si|u)s$", "$1s"); // '-us' and '-ss' are already singular 546 inflect.addSingularize("(n)ews$", "$1ews"); 547 inflect.addSingularize("([ti])a$", "$1um"); 548 inflect.addSingularize("((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis"); 549 inflect.addSingularize("(^analy)ses$", "$1sis"); 550 inflect.addSingularize("(^analy)sis$", "$1sis"); // already singular, but ends in 's' 551 inflect.addSingularize("([^f])ves$", "$1fe"); 552 inflect.addSingularize("(hive)s$", "$1"); 553 inflect.addSingularize("(tive)s$", "$1"); 554 inflect.addSingularize("([lr])ves$", "$1f"); 555 inflect.addSingularize("([^aeiouy]|qu)ies$", "$1y"); 556 inflect.addSingularize("(s)eries$", "$1eries"); 557 inflect.addSingularize("(m)ovies$", "$1ovie"); 558 inflect.addSingularize("(x|ch|ss|sh)es$", "$1"); 559 inflect.addSingularize("([m|l])ice$", "$1ouse"); 560 inflect.addSingularize("(bus)es$", "$1"); 561 inflect.addSingularize("(o)es$", "$1"); 562 inflect.addSingularize("(shoe)s$", "$1"); 563 inflect.addSingularize("(cris|ax|test)is$", "$1is"); // already singular, but ends in 's' 564 inflect.addSingularize("(cris|ax|test)es$", "$1is"); 565 inflect.addSingularize("(octop|vir)i$", "$1us"); 566 inflect.addSingularize("(octop|vir)us$", "$1us"); // already singular, but ends in 's' 567 inflect.addSingularize("(alias|status)es$", "$1"); 568 inflect.addSingularize("(alias|status)$", "$1"); // already singular, but ends in 's' 569 inflect.addSingularize("^(ox)en", "$1"); 570 inflect.addSingularize("(vert|ind)ices$", "$1ex"); 571 inflect.addSingularize("(matr)ices$", "$1ix"); 572 inflect.addSingularize("(quiz)zes$", "$1"); 573 574 inflect.addIrregular("person", "people"); 575 inflect.addIrregular("man", "men"); 576 inflect.addIrregular("child", "children"); 577 inflect.addIrregular("sex", "sexes"); 578 inflect.addIrregular("move", "moves"); 579 inflect.addIrregular("stadium", "stadiums"); 580 581 inflect.addUncountable("equipment", "information", "rice", "money", "species", "series", "fish", "sheep"); 582 } 583 584}