-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Reading, writing and manipulating ".tar" archive files.
--   
--   This library is for working with "<tt>.tar</tt>" archive files. It can
--   read and write a range of common variations of archive format
--   including V7, USTAR, POSIX and GNU formats. It provides support for
--   packing and unpacking portable archives. This makes it suitable for
--   distribution but not backup because details like file ownership and
--   exact permissions are not preserved.
@package tar
@version 0.4.0.1


-- | Perform various checks on tar file entries.
module Codec.Archive.Tar.Check

-- | This function checks a sequence of tar entries for file name security
--   problems. It checks that:
--   
--   <ul>
--   <li>file paths are not absolute</li>
--   <li>file paths do not contain any path components that are
--   "<tt>..</tt>"</li>
--   <li>file names are valid</li>
--   </ul>
--   
--   These checks are from the perspective of the current OS. That means we
--   check for "<tt>C:blah</tt>" files on Windows and "/blah" files on
--   Unix. For archive entry types <a>HardLink</a> and <a>SymbolicLink</a>
--   the same checks are done for the link target. A failure in any entry
--   terminates the sequence of entries with an error.
checkSecurity :: Entries e -> Entries (Either e FileNameError)

-- | Errors arising from tar file names being in some way invalid or
--   dangerous
data FileNameError
InvalidFileName :: FilePath -> FileNameError
AbsoluteFileName :: FilePath -> FileNameError

-- | This function checks a sequence of tar entries for being a "tar bomb".
--   This means that the tar file does not follow the standard convention
--   that all entries are within a single subdirectory, e.g. a file
--   "foo.tar" would usually have all entries within the "foo/"
--   subdirectory.
--   
--   Given the expected subdirectory, this function checks all entries are
--   within that subdirectroy.
--   
--   Note: This check must be used in conjunction with
--   <a>checkSecurity</a>.
checkTarbomb :: FilePath -> Entries e -> Entries (Either e TarBombError)

-- | An error that occurs if a tar file is a "tar bomb" that would extract
--   files outside of the intended directory.
data TarBombError
TarBombError :: FilePath -> TarBombError

-- | This function checks a sequence of tar entries for a number of
--   portability issues. It will complain if:
--   
--   <ul>
--   <li>The old "Unix V7" or "gnu" formats are used. For maximum
--   portability only the POSIX standard "ustar" format should be
--   used.</li>
--   <li>A non-portable entry type is used. Only ordinary files, hard
--   links, symlinks and directories are portable. Device files, pipes and
--   others are not portable between all common operating systems.</li>
--   <li>Non-ASCII characters are used in file names. There is no agreed
--   portable convention for Unicode or other extended character sets in
--   file names in tar archives.</li>
--   <li>File names that would not be portable to both Unix and Windows.
--   This check includes characters that are valid in both systems and the
--   '/' vs '\' directory separator conventions.</li>
--   </ul>
checkPortability :: Entries e -> Entries (Either e PortabilityError)

-- | Potential portability issues in a tar archive
data PortabilityError
NonPortableFormat :: Format -> PortabilityError
NonPortableFileType :: PortabilityError
NonPortableEntryNameChar :: FilePath -> PortabilityError
NonPortableFileName :: PortabilityPlatform -> FileNameError -> PortabilityError

-- | The name of a platform that portability issues arise from
type PortabilityPlatform = String
instance Typeable FileNameError
instance Typeable TarBombError
instance Typeable PortabilityError
instance Show PortabilityError
instance Exception PortabilityError
instance Show TarBombError
instance Exception TarBombError
instance Exception FileNameError
instance Show FileNameError


-- | Types and functions to manipulate tar entries.
--   
--   While the <a>Codec.Archive.Tar</a> module provides only the simple
--   high level API, this module provides full access to the details of tar
--   entries. This lets you inspect all the meta-data, construct entries
--   and handle error cases more precisely.
--   
--   This module uses common names and so is designed to be imported
--   qualified:
--   
--   <pre>
--   import qualified Codec.Archive.Tar       as Tar
--   import qualified Codec.Archive.Tar.Entry as Tar
--   </pre>
module Codec.Archive.Tar.Entry

-- | Tar archive entry.
data Entry
Entry :: !TarPath -> !EntryContent -> !Permissions -> !Ownership -> !EpochTime -> !Format -> Entry

-- | The path of the file or directory within the archive. This is in a
--   tar-specific form. Use <a>entryPath</a> to get a native
--   <a>FilePath</a>.
entryTarPath :: Entry -> !TarPath

-- | The real content of the entry. For <a>NormalFile</a> this includes the
--   file data. An entry usually contains a <a>NormalFile</a> or a
--   <a>Directory</a>.
entryContent :: Entry -> !EntryContent

-- | File permissions (Unix style file mode).
entryPermissions :: Entry -> !Permissions

-- | The user and group to which this file belongs.
entryOwnership :: Entry -> !Ownership

-- | The time the file was last modified.
entryTime :: Entry -> !EpochTime

-- | The tar format the archive is using.
entryFormat :: Entry -> !Format

-- | Native <a>FilePath</a> of the file or directory within the archive.
entryPath :: Entry -> FilePath

-- | The content of a tar archive entry, which depends on the type of
--   entry.
--   
--   Portable archives should contain only <a>NormalFile</a> and
--   <a>Directory</a>.
data EntryContent
NormalFile :: ByteString -> !FileSize -> EntryContent
Directory :: EntryContent
SymbolicLink :: !LinkTarget -> EntryContent
HardLink :: !LinkTarget -> EntryContent
CharacterDevice :: !DevMajor -> !DevMinor -> EntryContent
BlockDevice :: !DevMajor -> !DevMinor -> EntryContent
NamedPipe :: EntryContent
OtherEntryType :: !TypeCode -> ByteString -> !FileSize -> EntryContent
data Ownership
Ownership :: String -> String -> !Int -> !Int -> Ownership

-- | The owner user name. Should be set to <tt>""</tt> if unknown.
ownerName :: Ownership -> String

-- | The owner group name. Should be set to <tt>""</tt> if unknown.
groupName :: Ownership -> String

-- | Numeric owner user id. Should be set to <tt>0</tt> if unknown.
ownerId :: Ownership -> !Int

-- | Numeric owner group id. Should be set to <tt>0</tt> if unknown.
groupId :: Ownership -> !Int
type FileSize = Int64
type Permissions = FileMode

-- | The number of seconds since the UNIX epoch
type EpochTime = Int64
type DevMajor = Int
type DevMinor = Int
type TypeCode = Char

-- | There have been a number of extensions to the tar file format over the
--   years. They all share the basic entry fields and put more meta-data in
--   different extended headers.
data Format

-- | This is the classic Unix V7 tar format. It does not support owner and
--   group names, just numeric Ids. It also does not support device
--   numbers.
V7Format :: Format

-- | The "USTAR" format is an extension of the classic V7 format. It was
--   later standardised by POSIX. It has some restrictions but is the most
--   portable format.
UstarFormat :: Format

-- | The GNU tar implementation also extends the classic V7 format, though
--   in a slightly different way from the USTAR format. In general for new
--   archives the standard USTAR/POSIX should be used.
GnuFormat :: Format

-- | An <a>Entry</a> with all default values except for the file name and
--   type. It uses the portable USTAR/POSIX format (see
--   <tt>UstarHeader</tt>).
--   
--   You can use this as a basis and override specific fields, eg:
--   
--   <pre>
--   (emptyEntry name HardLink) { linkTarget = target }
--   </pre>
simpleEntry :: TarPath -> EntryContent -> Entry

-- | A tar <a>Entry</a> for a file.
--   
--   Entry fields such as file permissions and ownership have default
--   values.
--   
--   You can use this as a basis and override specific fields. For example
--   if you need an executable file you could use:
--   
--   <pre>
--   (fileEntry name content) { fileMode = executableFileMode }
--   </pre>
fileEntry :: TarPath -> ByteString -> Entry

-- | A tar <a>Entry</a> for a directory.
--   
--   Entry fields such as file permissions and ownership have default
--   values.
directoryEntry :: TarPath -> Entry

-- | <tt>rw-r--r--</tt> for normal files
ordinaryFilePermissions :: Permissions

-- | <tt>rwxr-xr-x</tt> for executable files
executableFilePermissions :: Permissions

-- | <tt>rwxr-xr-x</tt> for directories
directoryPermissions :: Permissions

-- | Construct a tar <a>Entry</a> based on a local file.
--   
--   This sets the entry size, the data contained in the file and the
--   file's modification time. If the file is executable then that
--   information is also preserved. File ownership and detailed permissions
--   are not preserved.
--   
--   <ul>
--   <li>The file contents is read lazily.</li>
--   </ul>
packFileEntry :: FilePath -> TarPath -> IO Entry

-- | Construct a tar <a>Entry</a> based on a local directory (but not its
--   contents).
--   
--   The only attribute of the directory that is used is its modification
--   time. Directory ownership and detailed permissions are not preserved.
packDirectoryEntry :: FilePath -> TarPath -> IO Entry

-- | This is a utility function, much like <a>getDirectoryContents</a>. The
--   difference is that it includes the contents of subdirectories.
--   
--   The paths returned are all relative to the top directory. Directory
--   paths are distinguishable by having a trailing path separator (see
--   <a>hasTrailingPathSeparator</a>).
--   
--   All directories are listed before the files that they contain. Amongst
--   the contents of a directory, subdirectories are listed after normal
--   files. The overall result is that files within a directory will be
--   together in a single contiguous group. This tends to improve file
--   layout and IO performance when creating or extracting tar archives.
--   
--   <ul>
--   <li>This function returns results lazily. Subdirectories are not
--   scanned until the files entries in the parent directory have been
--   consumed.</li>
--   </ul>
getDirectoryContentsRecursive :: FilePath -> IO [FilePath]

-- | The classic tar format allowed just 100 characters for the file name.
--   The USTAR format extended this with an extra 155 characters, however
--   it uses a complex method of splitting the name between the two
--   sections.
--   
--   Instead of just putting any overflow into the extended area, it uses
--   the extended area as a prefix. The aggravating insane bit however is
--   that the prefix (if any) must only contain a directory prefix. That is
--   the split between the two areas must be on a directory separator
--   boundary. So there is no simple calculation to work out if a file name
--   is too long. Instead we have to try to find a valid split that makes
--   the name fit in the two areas.
--   
--   The rationale presumably was to make it a bit more compatible with old
--   tar programs that only understand the classic format. A classic tar
--   would be able to extract the file name and possibly some dir prefix,
--   but not the full dir prefix. So the files would end up in the wrong
--   place, but that's probably better than ending up with the wrong names
--   too.
--   
--   So it's understandable but rather annoying.
--   
--   <ul>
--   <li>Tar paths use Posix format (ie <tt>'/'</tt> directory separators),
--   irrespective of the local path conventions.</li>
--   <li>The directory separator between the prefix and name is <i>not</i>
--   stored.</li>
--   </ul>
data TarPath

-- | Convert a native <a>FilePath</a> to a <a>TarPath</a>.
--   
--   The conversion may fail if the <a>FilePath</a> is too long. See
--   <a>TarPath</a> for a description of the problem with splitting long
--   <a>FilePath</a>s.
toTarPath :: Bool -> FilePath -> Either String TarPath

-- | Convert a <a>TarPath</a> to a native <a>FilePath</a>.
--   
--   The native <a>FilePath</a> will use the native directory separator but
--   it is not otherwise checked for validity or sanity. In particular:
--   
--   <ul>
--   <li>The tar path may be invalid as a native path, eg the file name
--   <tt>"nul"</tt> is not valid on Windows.</li>
--   <li>The tar path may be an absolute path or may contain <tt>".."</tt>
--   components. For security reasons this should not usually be allowed,
--   but it is your responsibility to check for these conditions (eg using
--   <tt>checkSecurity</tt>).</li>
--   </ul>
fromTarPath :: TarPath -> FilePath

-- | Convert a <a>TarPath</a> to a Unix/Posix <a>FilePath</a>.
--   
--   The difference compared to <a>fromTarPath</a> is that it always
--   returns a Unix style path irrespective of the current operating
--   system.
--   
--   This is useful to check how a <a>TarPath</a> would be interpreted on a
--   specific operating system, eg to perform portability checks.
fromTarPathToPosixPath :: TarPath -> FilePath

-- | Convert a <a>TarPath</a> to a Windows <a>FilePath</a>.
--   
--   The only difference compared to <a>fromTarPath</a> is that it always
--   returns a Windows style path irrespective of the current operating
--   system.
--   
--   This is useful to check how a <a>TarPath</a> would be interpreted on a
--   specific operating system, eg to perform portability checks.
fromTarPathToWindowsPath :: TarPath -> FilePath

-- | The tar format allows just 100 ASCII characters for the
--   <a>SymbolicLink</a> and <a>HardLink</a> entry types.
data LinkTarget

-- | Convert a native <a>FilePath</a> to a tar <a>LinkTarget</a>. This may
--   fail if the string is longer than 100 characters or if it contains
--   non-portable characters.
toLinkTarget :: FilePath -> Maybe LinkTarget

-- | Convert a tar <a>LinkTarget</a> to a native <a>FilePath</a>.
fromLinkTarget :: LinkTarget -> FilePath

-- | Convert a tar <a>LinkTarget</a> to a Unix/Posix <a>FilePath</a>.
fromLinkTargetToPosixPath :: LinkTarget -> FilePath

-- | Convert a tar <a>LinkTarget</a> to a Windows <a>FilePath</a>.
fromLinkTargetToWindowsPath :: LinkTarget -> FilePath


-- | Reading, writing and manipulating "<tt>.tar</tt>" archive files.
--   
--   This module uses common names and so is designed to be imported
--   qualified:
--   
--   <pre>
--   import qualified Codec.Archive.Tar as Tar
--   </pre>
module Codec.Archive.Tar

-- | Create a new <tt>".tar"</tt> file from a directory of files.
--   
--   It is equivalent to calling the standard <tt>tar</tt> program like so:
--   
--   <pre>
--   $ tar -f tarball.tar -C base -c dir
--   </pre>
--   
--   This assumes a directory <tt>./base/dir</tt> with files inside, eg
--   <tt>./base/dir/foo.txt</tt>. The file names inside the resulting tar
--   file will be relative to <tt>dir</tt>, eg <tt>dir/foo.txt</tt>.
--   
--   This is a high level "all in one" operation. Since you may need
--   variations on this function it is instructive to see how it is
--   written. It is just:
--   
--   <pre>
--   BS.writeFile tar . Tar.write =&lt;&lt; Tar.pack base paths
--   </pre>
--   
--   Notes:
--   
--   The files and directories must not change during this operation or the
--   result is not well defined.
--   
--   The intention of this function is to create tarballs that are portable
--   between systems. It is <i>not</i> suitable for doing file system
--   backups because file ownership and permissions are not fully
--   preserved. File ownership is not preserved at all. File permissions
--   are set to simple portable values:
--   
--   <ul>
--   <li><tt>rw-r--r--</tt> for normal files</li>
--   <li><tt>rwxr-xr-x</tt> for executable files</li>
--   <li><tt>rwxr-xr-x</tt> for directories</li>
--   </ul>
create :: FilePath -> FilePath -> [FilePath] -> IO ()

-- | Extract all the files contained in a <tt>".tar"</tt> file.
--   
--   It is equivalent to calling the standard <tt>tar</tt> program like so:
--   
--   <pre>
--   $ tar -x -f tarball.tar -C dir
--   </pre>
--   
--   So for example if the <tt>tarball.tar</tt> file contains
--   <tt>foo/bar.txt</tt> then this will extract it to
--   <tt>dir/foo/bar.txt</tt>.
--   
--   This is a high level "all in one" operation. Since you may need
--   variations on this function it is instructive to see how it is
--   written. It is just:
--   
--   <pre>
--   Tar.unpack dir . Tar.read =&lt;&lt; BS.readFile tar
--   </pre>
--   
--   Notes:
--   
--   Extracting can fail for a number of reasons. The tarball may be
--   incorrectly formatted. There may be IO or permission errors. In such
--   cases an exception will be thrown and extraction will not continue.
--   
--   Since the extraction may fail part way through it is not atomic. For
--   this reason you may want to extract into an empty directory and, if
--   the extraction fails, recursively delete the directory.
--   
--   Security: only files inside the target directory will be written.
--   Tarballs containing entries that point outside of the tarball (either
--   absolute paths or relative paths) will be caught and an exception will
--   be thrown.
extract :: FilePath -> FilePath -> IO ()

-- | Convert a data stream in the tar file format into an internal data
--   structure. Decoding errors are reported by the <a>Fail</a> constructor
--   of the <a>Entries</a> type.
--   
--   <ul>
--   <li>The conversion is done lazily.</li>
--   </ul>
read :: ByteString -> Entries FormatError

-- | Create the external representation of a tar archive by serialising a
--   list of tar entries.
--   
--   <ul>
--   <li>The conversion is done lazily.</li>
--   </ul>
write :: [Entry] -> ByteString

-- | Creates a tar archive from a list of directory or files. Any
--   directories specified will have their contents included recursively.
--   Paths in the archive will be relative to the given base directory.
--   
--   This is a portable implementation of packing suitable for portable
--   archives. In particular it only constructs <a>NormalFile</a> and
--   <a>Directory</a> entries. Hard links and symbolic links are treated
--   like ordinary files. It cannot be used to pack directories containing
--   recursive symbolic links. Special files like FIFOs (named pipes),
--   sockets or device files will also cause problems.
--   
--   An exception will be thrown for any file names that are too long to
--   represent as a <a>TarPath</a>.
--   
--   <ul>
--   <li>This function returns results lazily. Subdirectories are scanned
--   and files are read one by one as the list of entries is consumed.</li>
--   </ul>
pack :: FilePath -> [FilePath] -> IO [Entry]

-- | Create local files and directories based on the entries of a tar
--   archive.
--   
--   This is a portable implementation of unpacking suitable for portable
--   archives. It handles <a>NormalFile</a> and <a>Directory</a> entries
--   and has simulated support for <a>SymbolicLink</a> and <a>HardLink</a>
--   entries. Links are implemented by copying the target file. This
--   therefore works on Windows as well as Unix. All other entry types are
--   ignored, that is they are not unpacked and no exception is raised.
--   
--   If the <a>Entries</a> ends in an error then it is raised an an
--   exception. Any files or directories that have been unpacked before the
--   error was encountered will not be deleted. For this reason you may
--   want to unpack into an empty directory so that you can easily clean up
--   if unpacking fails part-way.
--   
--   On its own, this function only checks for security (using
--   <a>checkSecurity</a>). You can do other checks by applying checking
--   functions to the <a>Entries</a> that you pass to this function. For
--   example:
--   
--   <pre>
--   unpack dir (checkTarbomb expectedDir entries)
--   </pre>
--   
--   If you care about the priority of the reported errors then you may
--   want to use <a>checkSecurity</a> before <a>checkTarbomb</a> or other
--   checks.
unpack :: Exception e => FilePath -> Entries e -> IO ()

-- | Tar archive entry.
data Entry

-- | Native <a>FilePath</a> of the file or directory within the archive.
entryPath :: Entry -> FilePath

-- | The real content of the entry. For <a>NormalFile</a> this includes the
--   file data. An entry usually contains a <a>NormalFile</a> or a
--   <a>Directory</a>.
entryContent :: Entry -> EntryContent

-- | The content of a tar archive entry, which depends on the type of
--   entry.
--   
--   Portable archives should contain only <a>NormalFile</a> and
--   <a>Directory</a>.
data EntryContent
NormalFile :: ByteString -> !FileSize -> EntryContent
Directory :: EntryContent
SymbolicLink :: !LinkTarget -> EntryContent
HardLink :: !LinkTarget -> EntryContent
CharacterDevice :: !DevMajor -> !DevMinor -> EntryContent
BlockDevice :: !DevMajor -> !DevMinor -> EntryContent
NamedPipe :: EntryContent
OtherEntryType :: !TypeCode -> ByteString -> !FileSize -> EntryContent

-- | A tar archive is a sequence of entries.
--   
--   The point of this type as opposed to just using a list is that it
--   makes the failure case explicit. We need this because the sequence of
--   entries we get from reading a tarball can include errors.
--   
--   It is a concrete data type so you can manipulate it directly but it is
--   often clearer to use the provided functions for mapping, folding and
--   unfolding.
--   
--   Converting from a list can be done with just <tt>foldr Next Done</tt>.
--   Converting back into a list can be done with <a>foldEntries</a>
--   however in that case you must be prepared to handle the <a>Fail</a>
--   case inherent in the <a>Entries</a> type.
--   
--   The <a>Monoid</a> instance lets you concatenate archives or append
--   entries to an archive.
data Entries e
Next :: Entry -> (Entries e) -> Entries e
Done :: Entries e
Fail :: e -> Entries e

-- | This is like the standard <a>map</a> function on lists, but for
--   <a>Entries</a>. It includes failure as a extra possible outcome of the
--   mapping function.
--   
--   If your mapping function cannot fail it may be more convenient to use
--   <a>mapEntriesNoFail</a>
mapEntries :: (Entry -> Either e' Entry) -> Entries e -> Entries (Either e e')

-- | Like <a>mapEntries</a> but the mapping function itself cannot fail.
mapEntriesNoFail :: (Entry -> Entry) -> Entries e -> Entries e

-- | This is like the standard <a>foldr</a> function on lists, but for
--   <a>Entries</a>. Compared to <a>foldr</a> it takes an extra function to
--   account for the possibility of failure.
--   
--   This is used to consume a sequence of entries. For example it could be
--   used to scan a tarball for problems or to collect an index of the
--   contents.
foldEntries :: (Entry -> a -> a) -> a -> (e -> a) -> Entries e -> a

-- | This is like the standard <tt>unfoldr</tt> function on lists, but for
--   <a>Entries</a>. It includes failure as an extra possibility that the
--   stepper function may return.
--   
--   It can be used to generate <a>Entries</a> from some other type. For
--   example it is used internally to lazily unfold entries from a
--   <a>ByteString</a>.
unfoldEntries :: (a -> Either e (Maybe (Entry, a))) -> a -> Entries e

-- | Errors that can be encountered when parsing a Tar archive.
data FormatError
TruncatedArchive :: FormatError
ShortTrailer :: FormatError
BadTrailer :: FormatError
TrailingJunk :: FormatError
ChecksumIncorrect :: FormatError
NotTarFormat :: FormatError
UnrecognisedTarFormat :: FormatError
HeaderBadNumericEncoding :: FormatError
