001    /*
002     * This file is part of muCommander, http://www.mucommander.com
003     * Copyright (C) 2002-2008 Maxence Bernard
004     *
005     * muCommander is free software; you can redistribute it and/or modify
006     * it under the terms of the GNU General Public License as published by
007     * the Free Software Foundation; either version 3 of the License, or
008     * (at your option) any later version.
009     *
010     * muCommander is distributed in the hope that it will be useful,
011     * but WITHOUT ANY WARRANTY; without even the implied warranty of
012     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013     * GNU General Public License for more details.
014     *
015     * You should have received a copy of the GNU General Public License
016     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
017     */
018    
019    package com.mucommander.io;
020    
021    import java.io.IOException;
022    import java.io.InputStream;
023    
024    /**
025     * This class provides methods to determine whether some data is binary data or text data.
026     * As there is no formal characterization of what binary data really is, this method is an approximation at best
027     * and should not be trusted for anything critical.
028     *
029     * <p>The {@link #RECOMMENDED_BYTE_SIZE} field indicates how many bytes should be provided for the detector to be
030     * confident enough.</p>
031     *
032     * @see com.mucommander.io.EncodingDetector
033     * @author Maxence Bernard
034     */
035    public class BinaryDetector {
036    
037        /** Provides an indication as to the number of bytes that should fed to the detector for it to have enough
038         * confidence. */
039        public final static int RECOMMENDED_BYTE_SIZE = 1024;
040    
041    
042        /**
043         * This method is a shorthand for {@link #guessBinary(byte[], int, int) guessBinary(b, 0, b.length)}.
044         *
045         * @param b the data to analyze
046         * @return true if BinaryDetector thinks that the specified data is binary
047         */
048        public static boolean guessBinary(byte b[]) {
049            return guessBinary(b, 0, b.length);
050        }
051    
052        /**
053         * Tries and detect whether the given bytes correspond to binary or text data. The specified bytes can typically
054         * be the beginning of a file.</br>
055         * This method returns <code>true</code> if it thinks that the bytes correspond to binary data.
056         *
057         * @param b the data to analyze
058         * @param off specifies where to start reading the array
059         * @param len specifies where to stop reading the array
060         * @return true if BinaryDetector thinks that the specified data is binary
061         */
062        public static boolean guessBinary(byte b[], int off, int len) {
063            // Returns true if any of the bytes are the NUL character. The NUL character is usually never found in a text
064            // file, no matter what character encoding is used.
065            int end = off+len;
066            for(int i=off; i<end; i++) {
067                if(b[i]==0x00)
068                    return true;
069            }
070    
071            return false;
072        }
073    
074        /**
075         * Tries and detect whether the given stream contains binary or text data.</br>
076         * This method returns <code>true</code> if it thinks that the bytes correspond to binary data.
077         *
078         * <p>A maximum of {@link #RECOMMENDED_BYTE_SIZE} will be read from the <code>InputStream</code>. The
079         * stream will not be closed and will not be repositionned after the bytes have been read. It is up to the calling
080         * method to use the <code>InputStream#mark()</code> and <code>InputStream#reset()</code> methods (if supported)
081         * or reopen the stream if needed.
082         * </p>
083         *
084         * @param in the stream to analyze
085         * @return true if BinaryDetector thinks that the specified data is binary
086         * @throws IOException if an error occurred while reading the InputStream.
087         */
088        public static boolean guessBinary(InputStream in) throws IOException {
089            byte[] bytes = new byte[RECOMMENDED_BYTE_SIZE];
090            return guessBinary(bytes, 0, StreamUtils.readUpTo(in, bytes));
091        }
092    }