diff --git a/Download.hs b/Download.hs index 9c021a6..4147c2a 100644 --- a/Download.hs +++ b/Download.hs @@ -1,10 +1,15 @@ module Download (downloadHtmlDef) where -import Network.HTTP +import Data.ByteString.Lazy +import Network.HTTP.Simple baseUrl = "http://www.cnrtl.fr/definition/" -downloadHtmlDef :: String -> IO String -downloadHtmlDef w = - simpleHTTP (req) >>= getResponseBody - where req = getRequest (baseUrl ++ w) +downloadHtmlDef :: String -> IO ByteString +downloadHtmlDef w = do + request <- getRequest w + response <- httpLBS (request) + return (getResponseBody response) + +getRequest :: String -> IO Request +getRequest w = parseRequest (baseUrl ++ w) diff --git a/Main.hs b/Main.hs index 31671fc..7731d5b 100644 --- a/Main.hs +++ b/Main.hs @@ -3,9 +3,10 @@ module Main where import System.Environment import Download import ParseHtml +import qualified Data.Text.Lazy.Encoding as E main :: IO () main = do args <- getArgs htmlResult <- downloadHtmlDef (head args) - printDifferentDef htmlResult + printDifferentDef (E.decodeUtf8 htmlResult) diff --git a/ParseHtml.hs b/ParseHtml.hs index 9d7f758..62a92bf 100644 --- a/ParseHtml.hs +++ b/ParseHtml.hs @@ -1,16 +1,18 @@ module ParseHtml (printDifferentDef, differentDefTags, parseDefTags) where import Text.HTML.TagSoup +import Prelude +import qualified Data.Text.Lazy as T data WordDefHeader = WordDefHeader WordName WordType data WordName = WordName String data WordType = WordType String -printDifferentDef :: String -> IO () +printDifferentDef :: T.Text -> IO () printDifferentDef x = putStrLn (getDifferentDef x) -getDifferentDef :: String -> String +getDifferentDef :: T.Text -> String getDifferentDef x = getDifferentDefMessages (length xs) ++ renderWordList xs where xs = (parseDefTags . differentDefTags) x @@ -46,7 +48,7 @@ subTags xs = if (length xs) `mod` 3 /= 0 then splitAt 2 xs else ((take 1 xs) ++ (take 1 (drop 2 xs)) , drop 3 xs) -differentDefTags :: String -> [Tag String] +differentDefTags :: T.Text -> [Tag String] differentDefTags = filter (~== TagText "") . (takeWhile (~/= "