001 /*
002 * This file is part of muCommander, http://www.mucommander.com
003 * Copyright (C) 2002-2008 Maxence Bernard
004 *
005 * muCommander is free software; you can redistribute it and/or modify
006 * it under the terms of the GNU General Public License as published by
007 * the Free Software Foundation; either version 3 of the License, or
008 * (at your option) any later version.
009 *
010 * muCommander is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013 * GNU General Public License for more details.
014 *
015 * You should have received a copy of the GNU General Public License
016 * along with this program. If not, see <http://www.gnu.org/licenses/>.
017 */
018
019 package com.mucommander.io;
020
021 import java.io.IOException;
022 import java.io.InputStream;
023
024 /**
025 * This class provides methods to determine whether some data is binary data or text data.
026 * As there is no formal characterization of what binary data really is, this method is an approximation at best
027 * and should not be trusted for anything critical.
028 *
029 * <p>The {@link #RECOMMENDED_BYTE_SIZE} field indicates how many bytes should be provided for the detector to be
030 * confident enough.</p>
031 *
032 * @see com.mucommander.io.EncodingDetector
033 * @author Maxence Bernard
034 */
035 public class BinaryDetector {
036
037 /** Provides an indication as to the number of bytes that should fed to the detector for it to have enough
038 * confidence. */
039 public final static int RECOMMENDED_BYTE_SIZE = 1024;
040
041
042 /**
043 * This method is a shorthand for {@link #guessBinary(byte[], int, int) guessBinary(b, 0, b.length)}.
044 *
045 * @param b the data to analyze
046 * @return true if BinaryDetector thinks that the specified data is binary
047 */
048 public static boolean guessBinary(byte b[]) {
049 return guessBinary(b, 0, b.length);
050 }
051
052 /**
053 * Tries and detect whether the given bytes correspond to binary or text data. The specified bytes can typically
054 * be the beginning of a file.</br>
055 * This method returns <code>true</code> if it thinks that the bytes correspond to binary data.
056 *
057 * @param b the data to analyze
058 * @param off specifies where to start reading the array
059 * @param len specifies where to stop reading the array
060 * @return true if BinaryDetector thinks that the specified data is binary
061 */
062 public static boolean guessBinary(byte b[], int off, int len) {
063 // Returns true if any of the bytes are the NUL character. The NUL character is usually never found in a text
064 // file, no matter what character encoding is used.
065 int end = off+len;
066 for(int i=off; i<end; i++) {
067 if(b[i]==0x00)
068 return true;
069 }
070
071 return false;
072 }
073
074 /**
075 * Tries and detect whether the given stream contains binary or text data.</br>
076 * This method returns <code>true</code> if it thinks that the bytes correspond to binary data.
077 *
078 * <p>A maximum of {@link #RECOMMENDED_BYTE_SIZE} will be read from the <code>InputStream</code>. The
079 * stream will not be closed and will not be repositionned after the bytes have been read. It is up to the calling
080 * method to use the <code>InputStream#mark()</code> and <code>InputStream#reset()</code> methods (if supported)
081 * or reopen the stream if needed.
082 * </p>
083 *
084 * @param in the stream to analyze
085 * @return true if BinaryDetector thinks that the specified data is binary
086 * @throws IOException if an error occurred while reading the InputStream.
087 */
088 public static boolean guessBinary(InputStream in) throws IOException {
089 byte[] bytes = new byte[RECOMMENDED_BYTE_SIZE];
090 return guessBinary(bytes, 0, StreamUtils.readUpTo(in, bytes));
091 }
092 }