-- |
-- Module: Filesystem.Path.Rules
-- Copyright: 2010 John Millikin
-- License: MIT
--
-- Maintainer:  jmillikin@gmail.com
-- Portability:  portable
--
module Filesystem.Path.Rules
        ( Rules
        , posix
        , posix_ghc702
        , posix_ghc704
        , windows
        , darwin
        , darwin_ghc702

        -- * Type conversions
        , toText
        , fromText
        , encode
        , decode
        , encodeString
        , decodeString

        -- * Rule‐specific path properties
        , valid
        , splitSearchPath
        , splitSearchPathString
        ) where

import           Prelude hiding (FilePath, null)
import qualified Prelude as P

import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import           Data.Char (toUpper, chr, ord)
import           Data.List (intersperse, intercalate)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import           System.IO ()

import           Filesystem.Path hiding (root, filename, basename)
import           Filesystem.Path.Internal

-------------------------------------------------------------------------------
-- POSIX
-------------------------------------------------------------------------------

-- | Linux, BSD, and other UNIX or UNIX-like operating systems.
posix :: Rules B.ByteString
posix = Rules
        { rulesName = T.pack "POSIX"
        , valid = posixValid
        , splitSearchPath = posixSplitSearch
        , splitSearchPathString = posixSplitSearch . B8.pack
        , toText = posixToText
        , fromText = posixFromText
        , encode = posixToBytes
        , decode = posixFromBytes
        , encodeString = B8.unpack . posixToBytes
        , decodeString = posixFromBytes . B8.pack
        }

-- | Linux, BSD, and other UNIX or UNIX-like operating systems.
--
-- This is a variant of 'posix' for use with GHC 7.2, which tries to decode
-- file paths in its IO computations.
--
-- Since: 0.3.3 / 0.4.2
posix_ghc702 :: Rules B.ByteString
posix_ghc702 = posix
        { rulesName = T.pack "POSIX (GHC 7.2)"
        , splitSearchPathString = posixSplitSearchString posixFromGhc702String
        , encodeString = posixToGhc702String
        , decodeString = posixFromGhc702String
        }

-- | Linux, BSD, and other UNIX or UNIX-like operating systems.
--
-- This is a variant of 'posix' for use with GHC 7.4 or later, which tries to
-- decode file paths in its IO computations.
--
-- Since: 0.3.7 / 0.4.6
posix_ghc704 :: Rules B.ByteString
posix_ghc704 = posix
        { rulesName = T.pack "POSIX (GHC 7.4)"
        , splitSearchPathString = posixSplitSearchString posixFromGhc704String
        , encodeString = posixToGhc704String
        , decodeString = posixFromGhc704String
        }

posixToText :: FilePath -> Either T.Text T.Text
posixToText p = if good then Right text else Left text where
        good = and (map snd chunks)
        text = T.concat (root : map fst chunks)

        root = rootText (pathRoot p)
        chunks = intersperse (T.pack "/", True) (map unescape (directoryChunks p))

posixFromChunks :: [Chunk] -> FilePath
posixFromChunks chunks = FilePath root directories basename exts where
        (root, pastRoot) = if P.null (head chunks)
                then (Just RootPosix, tail chunks)
                else (Nothing, chunks)

        (directories, filename)
                | P.null pastRoot = ([], "")
                | otherwise = case last pastRoot of
                        fn | fn == dot -> (goodDirs pastRoot, "")
                        fn | fn == dots -> (goodDirs pastRoot, "")
                        fn -> (goodDirs (init pastRoot), fn)

        goodDirs = filter (not . P.null)

        (basename, exts) = parseFilename filename

posixFromText :: T.Text -> FilePath
posixFromText text = if T.null text
        then empty
        else posixFromChunks (map escape (textSplitBy (== '/') text))

posixToBytes :: FilePath -> B.ByteString
posixToBytes p = B.concat (root : chunks) where
        root = B8.pack (rootChunk (pathRoot p))
        chunks = intersperse (B8.pack "/") (map chunkBytes (directoryChunks p))
        chunkBytes c = unescapeBytes' c

posixFromBytes :: B.ByteString -> FilePath
posixFromBytes bytes = if B.null bytes
        then empty
        else posixFromChunks $ flip map (B.split 0x2F bytes) $ \b -> case maybeDecodeUtf8 b of
                Just text -> escape text
                Nothing -> processInvalidUtf8 b

processInvalidUtf8 :: B.ByteString -> Chunk
processInvalidUtf8 bytes = intercalate "." textChunks where
        byteChunks = B.split 0x2E bytes
        textChunks = map unicodeDammit byteChunks
        unicodeDammit b = case maybeDecodeUtf8 b of
                Just t -> escape t
                Nothing -> map (\c -> if ord c >= 0x80
                        then chr (ord c + 0xDC00)
                        else c) (B8.unpack b)

posixToGhc702String :: FilePath -> String
posixToGhc702String p = P.concat (root : chunks) where
        root = rootChunk (pathRoot p)
        chunks = intersperse "/" (map escapeToGhc702 (directoryChunks p))

escapeToGhc702 :: Chunk -> String
escapeToGhc702 = map (\c -> if ord c >= 0xDC80 && ord c <= 0xDCFF
        then chr (ord c - 0xDC00 + 0xEF00)
        else c)

posixFromGhc702String :: String -> FilePath
posixFromGhc702String cs = if P.null cs
        then empty
        else posixFromChunks (map escapeFromGhc702 (splitBy (== '/') cs))

escapeFromGhc702 :: String -> String
escapeFromGhc702 = map (\c -> if ord c >= 0xEF80 && ord c <= 0xEFFF
        -- hopefully this isn't a valid UTF8 filename decoding to these
        -- codepoints, but there's no way to tell here.
        then chr (ord c - 0xEF00 + 0xDC00)
        else c)

posixToGhc704String :: FilePath -> String
posixToGhc704String p = P.concat (root : chunks) where
        root = rootChunk (pathRoot p)
        chunks = intersperse "/" (directoryChunks p)

posixFromGhc704String :: String -> FilePath
posixFromGhc704String cs = if P.null cs
        then empty
        else posixFromChunks (splitBy (== '/') cs)

posixValid :: FilePath -> Bool
posixValid p = validRoot && validDirectories where
        validDirectories = all validChunk (directoryChunks p)
        validChunk ch = not (any (\c -> c == '\0' || c == '/') ch)
        validRoot = case pathRoot p of
                Nothing -> True
                Just RootPosix -> True
                _ -> False

posixSplitSearch :: B.ByteString -> [FilePath]
posixSplitSearch = map (posixFromBytes . normSearch) . B.split 0x3A where
        normSearch bytes = if B.null bytes then B8.pack "." else bytes

posixSplitSearchString :: (String -> FilePath) -> String -> [FilePath]
posixSplitSearchString toPath = map (toPath . normSearch) . splitBy (== ':') where
        normSearch s = if P.null s then "." else s

-------------------------------------------------------------------------------
-- Darwin
-------------------------------------------------------------------------------

-- | Darwin and Mac OS X.
--
-- This is almost identical to 'posix', but with a native path type of 'T.Text'
-- rather than 'B.ByteString'.
--
-- Since: 0.3.4 / 0.4.3
darwin :: Rules T.Text
darwin = Rules
        { rulesName = T.pack "Darwin"
        , valid = posixValid
        , splitSearchPath = darwinSplitSearch
        , splitSearchPathString = darwinSplitSearch . TE.decodeUtf8 . B8.pack
        , toText = Right . darwinToText
        , fromText = posixFromText
        , encode = darwinToText
        , decode = posixFromText
        , encodeString = darwinToString
        , decodeString = darwinFromString
        }

-- | Darwin and Mac OS X.
--
-- This is a variant of 'darwin' for use with GHC 7.2 or later, which tries to
-- decode file paths in its IO computations.
--
-- Since: 0.3.4 / 0.4.3
darwin_ghc702 :: Rules T.Text
darwin_ghc702 = darwin
        { rulesName = T.pack "Darwin (GHC 7.2)"
        , splitSearchPathString = darwinSplitSearch . T.pack
        , encodeString = T.unpack . darwinToText
        , decodeString = posixFromText . T.pack
        }

darwinToText :: FilePath -> T.Text
darwinToText p = T.concat (root : chunks) where
        root = rootText (pathRoot p)
        chunks = intersperse (T.pack "/") (map unescape' (directoryChunks p))

darwinToString :: FilePath -> String
darwinToString = B8.unpack . TE.encodeUtf8 . darwinToText

darwinFromString :: String -> FilePath
darwinFromString = posixFromText . TE.decodeUtf8 . B8.pack

darwinSplitSearch :: T.Text -> [FilePath]
darwinSplitSearch = map (posixFromText . normSearch) . textSplitBy (== ':') where
        normSearch text = if T.null text then T.pack "." else text

-------------------------------------------------------------------------------
-- Windows
-------------------------------------------------------------------------------

-- | Windows and DOS
windows :: Rules T.Text
windows = Rules
        { rulesName = T.pack "Windows"
        , valid = winValid
        , splitSearchPath = winSplit
        , splitSearchPathString = winSplit . T.pack
        , toText = Right . winToText
        , fromText = winFromText
        , encode = winToText
        , decode = winFromText
        , encodeString = T.unpack . winToText
        , decodeString = winFromText . T.pack
        }

winToText :: FilePath -> T.Text
winToText p = case pathRoot p of
        Just RootWindowsUnc{} -> uncToText p
        _ -> dosToText p

dosToText :: FilePath -> T.Text
dosToText p = T.concat (root : chunks) where
        root = rootText (pathRoot p)
        chunks = intersperse (T.pack "\\") (map unescape' (directoryChunks p))

uncToText :: FilePath -> T.Text
uncToText p = T.concat (root : chunks) where
        root = if all T.null chunks
                then rootText (pathRoot p)
                else rootText (pathRoot p) `T.append` T.pack "\\"
        chunks = intersperse (T.pack "\\") (filter (not . T.null) (map unescape' (directoryChunks p)))

winFromText :: T.Text -> FilePath
winFromText text = if T.null text then empty else path where
        path = FilePath root directories basename exts

        -- Windows has various types of absolute paths:
        --
        -- * C:\foo\bar -> DOS-style absolute path
        -- * \\?\C:\foo\bar -> extended-length absolute path
        -- * \\host\share\foo\bar -> UNC path
        -- * \\?\UNC\host\share\foo\bar -> extended-length UNC path
        --
        -- \foo\bar looks like an absolute path, but is actually a path
        -- relative to the current DOS drive.
        --
        -- http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx
        (root, pastRoot) = if T.isPrefixOf (T.pack "\\\\") text
                then case stripUncasedPrefix (T.pack "\\\\?\\UNC\\") text of
                        Just stripped -> parseUncRoot stripped True
                        Nothing -> case T.stripPrefix (T.pack "\\\\?\\") text of
                                Just stripped -> parseDosRoot stripped True
                                Nothing -> case T.stripPrefix (T.pack "\\\\") text of
                                        Just stripped -> parseUncRoot stripped False
                                        Nothing -> parseDosRoot text False
                else case T.stripPrefix (T.pack "\\??\\") text of
                        Just stripped -> parseDoubleQmark stripped
                        Nothing -> parseDosRoot text False

        (directories, filename)
                | P.null pastRoot = ([], Nothing)
                | otherwise = case last pastRoot of
                        fn | fn == T.pack "." -> (goodDirs pastRoot, Just "")
                        fn | fn == T.pack ".." -> (goodDirs pastRoot, Just "")
                        fn -> (goodDirs (init pastRoot), Just (escape fn))

        goodDirs :: [T.Text] -> [Chunk]
        goodDirs = map escape . filter (not . T.null)

        (basename, exts) = case filename of
                Just fn -> parseFilename fn
                Nothing -> (Nothing, [])

stripUncasedPrefix :: T.Text -> T.Text -> Maybe T.Text
stripUncasedPrefix prefix text = if T.toCaseFold prefix == T.toCaseFold (T.take (T.length prefix) text)
        then Just (T.drop (T.length prefix) text)
        else Nothing

parseDosRoot :: T.Text -> Bool -> (Maybe Root, [T.Text])
parseDosRoot text extended = parsed where
        split = textSplitBy (\c -> c == '/' || c == '\\') text

        head' = head split
        tail' = tail split
        parsed = if T.null head'
                then (Just RootWindowsCurrentVolume, tail')
                else if T.any (== ':') head'
                        then (Just (parseDrive head'), tail')
                                else (Nothing, split)

        parseDrive c = RootWindowsVolume (toUpper (T.head c)) extended

parseDoubleQmark :: T.Text -> (Maybe Root, [T.Text])
parseDoubleQmark text = (Just RootWindowsDoubleQMark, components) where
        components = textSplitBy (\c -> c == '/' || c == '\\') text

parseUncRoot :: T.Text -> Bool -> (Maybe Root, [T.Text])
parseUncRoot text extended = parsed where
        (host, pastHost) = T.break (== '\\') text
        (share, pastShare) = T.break (== '\\') (T.drop 1 pastHost)
        split = if T.null pastShare
                then []
                else textSplitBy (== '\\') pastShare
        parsed = (Just (RootWindowsUnc (T.unpack host) (T.unpack share) extended), split)

winValid :: FilePath -> Bool
winValid p = case pathRoot p of
        Nothing -> dosValid p
        Just RootWindowsCurrentVolume -> dosValid p
        Just (RootWindowsVolume v _) -> elem v ['A'..'Z'] && dosValid p
        Just (RootWindowsUnc host share _) -> uncValid p host share
        -- don't even try to validate \??\ paths
        Just RootWindowsDoubleQMark -> True
        Just RootPosix -> False

dosValid :: FilePath -> Bool
dosValid p = noReserved && validCharacters where
        reservedChars = map chr [0..0x1F] ++ "/\\?*:|\"<>"
        reservedNames =
                [ "AUX", "CLOCK$", "COM1", "COM2", "COM3", "COM4"
                , "COM5", "COM6", "COM7", "COM8", "COM9", "CON"
                , "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6"
                , "LPT7", "LPT8", "LPT9", "NUL", "PRN"
                ]

        noExt = p { pathExtensions = [] }
        noReserved = flip all (directoryChunks noExt)
                $ \fn -> notElem (map toUpper fn) reservedNames

        validCharacters = flip all (directoryChunks p)
                $ not . any (`elem` reservedChars)

uncValid :: FilePath -> String -> String -> Bool
uncValid _ "" _ = False
uncValid _ _ "" = False
uncValid p host share = ok host && ok share && all ok (dropWhileEnd P.null (directoryChunks p)) where
        ok ""  = False
        ok c = not (any invalidChar c)
        invalidChar c = c == '\x00' || c == '\\'

dropWhileEnd :: (a -> Bool) -> [a] -> [a]
dropWhileEnd p = foldr (\x xs -> if p x && P.null xs then [] else x : xs) []

winSplit :: T.Text -> [FilePath]
winSplit = map winFromText . filter (not . T.null) . textSplitBy (== ';')