72 lines
2.4 KiB
Haskell
72 lines
2.4 KiB
Haskell
module ParseHtml (printDef, printDifferentDef, filterSupTags, findRelevantTags) where
|
|
|
|
import Text.HTML.TagSoup
|
|
import Prelude
|
|
import WordDef
|
|
import qualified Data.List as L
|
|
import qualified Data.Text.Lazy as T
|
|
|
|
|
|
printDifferentDef :: T.Text -> IO ()
|
|
printDifferentDef x = putStrLn (getDifferentDef x)
|
|
|
|
getDifferentDef :: T.Text -> String
|
|
getDifferentDef x = getDifferentDefMessages (length xs) ++ renderHeaderList xs
|
|
where xs = (parseDefTags . differentDefTags) x
|
|
|
|
getDifferentDefMessages :: Int -> String
|
|
getDifferentDefMessages 0 = "Aucune définition disponible."
|
|
getDifferentDefMessages 1 = "Une définition disponible : \n"
|
|
getDifferentDefMessages _ = "Plusieurs définitions disponibles : \n"
|
|
|
|
parseDefTags :: [Tag String] -> [WordDefHeader]
|
|
parseDefTags [] = []
|
|
parseDefTags xs =
|
|
(WordDefHeader (WordName (renderTags (take 1 (fst st))))
|
|
(WordType (renderTags [(fst st) !! 1])))
|
|
: (parseDefTags (snd st))
|
|
where st = subTags xs
|
|
|
|
subTags :: [Tag String] -> ([Tag String],[Tag String])
|
|
subTags [] = ([],[])
|
|
subTags xs = if (length xs) `mod` 3 /= 0
|
|
then splitAt 2 xs
|
|
else ((take 1 xs) ++ (take 1 (drop 2 xs)) , drop 3 xs)
|
|
|
|
differentDefTags :: T.Text -> [Tag String]
|
|
differentDefTags =
|
|
filter (~== TagText "") . (takeWhile (~/= "<div id=contentbox>"))
|
|
. (dropWhile (~/= "<div id=vtoolbar>")) . parseTags . T.unpack
|
|
|
|
printDef :: T.Text -> Int -> IO ()
|
|
printDef t n = putStrLn (getDef t n)
|
|
|
|
getDef :: T.Text -> Int -> String
|
|
getDef t n = renderFullDef (getFullDef t n)
|
|
|
|
getFullDef :: T.Text -> Int -> WordFullDef
|
|
getFullDef t n = WordFullDef header defList
|
|
where
|
|
header = ((parseDefTags . differentDefTags) t !! n)
|
|
defList = map (WordDefSentence . renderTags) [parseFullDefTags t]
|
|
|
|
parseFullDefTags :: T.Text -> [Tag String]
|
|
parseFullDefTags = findRelevantTags . parseTags . T.unpack
|
|
|
|
findRelevantTags :: [Tag String] -> [Tag String]
|
|
findRelevantTags tags =
|
|
filter (~== TagText "") filteredTags
|
|
where
|
|
filteredTags =
|
|
takeWhile (~/= "<div id=footer>") $
|
|
dropWhile (~/= "<span class=tlf_cdefinition>") $
|
|
filterSupTags tags
|
|
|
|
filterSupTags :: [Tag String] -> [Tag String]
|
|
filterSupTags tags = filter (/= TagText "willBeDeleted") $ snd $ L.mapAccumL transformSupText (TagText "") tags
|
|
where
|
|
transformSupText x y
|
|
| y ~== "<sup>" = (TagText "willBeDeleted", TagText "willBeDeleted")
|
|
| x == TagText "willBeDeleted" = (TagText "", TagText "willBeDeleted")
|
|
| otherwise = (x,y)
|