module ParseHtml (printDef, printDifferentDef, filterSupTags, findRelevantTags) where import Text.HTML.TagSoup import Prelude import WordDef import qualified Data.List as L import qualified Data.Text.Lazy as T printDifferentDef :: T.Text -> IO () printDifferentDef x = putStrLn (getDifferentDef x) getDifferentDef :: T.Text -> String getDifferentDef x = getDifferentDefMessages (length xs) ++ renderHeaderList xs where xs = (parseDefTags . differentDefTags) x getDifferentDefMessages :: Int -> String getDifferentDefMessages 0 = "Aucune définition disponible." getDifferentDefMessages 1 = "Une définition disponible : \n" getDifferentDefMessages _ = "Plusieurs définitions disponibles : \n" parseDefTags :: [Tag String] -> [WordDefHeader] parseDefTags [] = [] parseDefTags xs = (WordDefHeader (WordName (renderTags (take 1 (fst st)))) (WordType (renderTags [(fst st) !! 1]))) : (parseDefTags (snd st)) where st = subTags xs subTags :: [Tag String] -> ([Tag String],[Tag String]) subTags [] = ([],[]) subTags xs = if (length xs) `mod` 3 /= 0 then splitAt 2 xs else ((take 1 xs) ++ (take 1 (drop 2 xs)) , drop 3 xs) differentDefTags :: T.Text -> [Tag String] differentDefTags = filter (~== TagText "") . (takeWhile (~/= "
")) . (dropWhile (~/= "
")) . parseTags . T.unpack printDef :: T.Text -> Int -> IO () printDef t n = putStrLn (getDef t n) getDef :: T.Text -> Int -> String getDef t n = renderFullDef (getFullDef t n) getFullDef :: T.Text -> Int -> WordFullDef getFullDef t n = WordFullDef header defList where header = ((parseDefTags . differentDefTags) t !! (n-1)) defList = map (WordDefSentence . renderTags) [parseFullDefTags t] parseFullDefTags :: T.Text -> [Tag String] parseFullDefTags = findRelevantTags . parseTags . T.unpack findRelevantTags :: [Tag String] -> [Tag String] findRelevantTags tags = filter (~== TagText "") filteredTags where filteredTags = takeWhile (~/= "