Change type and HTTP library to improve encoding

This commit is contained in:
artlef 2018-05-12 16:31:17 +02:00
parent 5a02d92848
commit f6782b32f5
3 changed files with 18 additions and 10 deletions

View File

@ -1,10 +1,15 @@
module Download (downloadHtmlDef) where
import Network.HTTP
import Data.ByteString.Lazy
import Network.HTTP.Simple
baseUrl = "http://www.cnrtl.fr/definition/"
downloadHtmlDef :: String -> IO String
downloadHtmlDef w =
simpleHTTP (req) >>= getResponseBody
where req = getRequest (baseUrl ++ w)
downloadHtmlDef :: String -> IO ByteString
downloadHtmlDef w = do
request <- getRequest w
response <- httpLBS (request)
return (getResponseBody response)
getRequest :: String -> IO Request
getRequest w = parseRequest (baseUrl ++ w)

View File

@ -3,9 +3,10 @@ module Main where
import System.Environment
import Download
import ParseHtml
import qualified Data.Text.Lazy.Encoding as E
main :: IO ()
main = do
args <- getArgs
htmlResult <- downloadHtmlDef (head args)
printDifferentDef htmlResult
printDifferentDef (E.decodeUtf8 htmlResult)

View File

@ -1,16 +1,18 @@
module ParseHtml (printDifferentDef, differentDefTags, parseDefTags) where
import Text.HTML.TagSoup
import Prelude
import qualified Data.Text.Lazy as T
data WordDefHeader = WordDefHeader WordName WordType
data WordName = WordName String
data WordType = WordType String
printDifferentDef :: String -> IO ()
printDifferentDef :: T.Text -> IO ()
printDifferentDef x = putStrLn (getDifferentDef x)
getDifferentDef :: String -> String
getDifferentDef :: T.Text -> String
getDifferentDef x = getDifferentDefMessages (length xs) ++ renderWordList xs
where xs = (parseDefTags . differentDefTags) x
@ -46,7 +48,7 @@ subTags xs = if (length xs) `mod` 3 /= 0
then splitAt 2 xs
else ((take 1 xs) ++ (take 1 (drop 2 xs)) , drop 3 xs)
differentDefTags :: String -> [Tag String]
differentDefTags :: T.Text -> [Tag String]
differentDefTags =
filter (~== TagText "") . (takeWhile (~/= "<div id=contentbox>"))
. (dropWhile (~/= "<div id=vtoolbar>")) . parseTags
. (dropWhile (~/= "<div id=vtoolbar>")) . parseTags . T.unpack