001/*
002 * ModeShape (http://www.modeshape.org)
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *       http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.modeshape.common.text;
017
018import java.util.BitSet;
019import org.modeshape.common.annotation.Immutable;
020
021/**
022 * An encoder useful for converting text to be used within a filename on common file systems and operating systems, including
023 * Linux, OS X, and Windows XP. This encoder is based upon the {@link UrlEncoder}, except that it removes the '*' character from
024 * the list of safe characters.
025 * 
026 * @see UrlEncoder
027 */
028@Immutable
029public class FilenameEncoder extends UrlEncoder {
030
031    /**
032     * Data characters that are allowed in a URI but do not have a reserved purpose are called unreserved. These include upper and
033     * lower case letters, decimal digits, and a limited set of punctuation marks and symbols.
034     * 
035     * <pre>
036     * unreserved  = alphanum | mark
037     * mark        = &quot;-&quot; | &quot;_&quot; | &quot;.&quot; | &quot;!&quot; | &quot;&tilde;&quot; | &quot;'&quot; | &quot;(&quot; | &quot;)&quot;
038     * </pre>
039     * 
040     * Unreserved characters can be escaped without changing the semantics of the URI, but this should not be done unless the URI
041     * is being used in a context that does not allow the unescaped character to appear.
042     */
043    private static final BitSet SAFE_CHARACTERS = new BitSet(256);
044    private static final BitSet SAFE_WITH_SLASH_CHARACTERS;
045
046    public static final char ESCAPE_CHARACTER = '%';
047
048    static {
049        SAFE_CHARACTERS.set('a', 'z' + 1);
050        SAFE_CHARACTERS.set('A', 'Z' + 1);
051        SAFE_CHARACTERS.set('0', '9' + 1);
052        SAFE_CHARACTERS.set('-');
053        SAFE_CHARACTERS.set('_');
054        SAFE_CHARACTERS.set('.');
055        SAFE_CHARACTERS.set('!');
056        SAFE_CHARACTERS.set('~');
057        SAFE_CHARACTERS.set('\'');
058        SAFE_CHARACTERS.set('(');
059        SAFE_CHARACTERS.set(')');
060
061        SAFE_WITH_SLASH_CHARACTERS = (BitSet)SAFE_CHARACTERS.clone();
062        SAFE_WITH_SLASH_CHARACTERS.set('/');
063    }
064
065    @Override
066    public String encode( String text ) {
067        if (text == null) return null;
068        if (text.length() == 0) return text;
069        return encode(text, isSlashEncoded() ? SAFE_CHARACTERS : SAFE_WITH_SLASH_CHARACTERS);
070    }
071
072    /**
073     * Set whether this encoder should use slash encoding.
074     * 
075     * @param slashEncoded Sets slashEncoded to the specified value.
076     * @return this object, for method chaining
077     */
078    @Override
079    public FilenameEncoder setSlashEncoded( boolean slashEncoded ) {
080        super.setSlashEncoded(slashEncoded);
081        return this;
082    }
083
084}