Change type and HTTP library to improve encoding

This commit is contained in:
artlef 2018-05-12 16:31:17 +02:00
parent 5a02d92848
commit f6782b32f5
3 changed files with 18 additions and 10 deletions

View File

@ -1,10 +1,15 @@
module Download (downloadHtmlDef) where module Download (downloadHtmlDef) where
import Network.HTTP import Data.ByteString.Lazy
import Network.HTTP.Simple
baseUrl = "http://www.cnrtl.fr/definition/" baseUrl = "http://www.cnrtl.fr/definition/"
downloadHtmlDef :: String -> IO String downloadHtmlDef :: String -> IO ByteString
downloadHtmlDef w = downloadHtmlDef w = do
simpleHTTP (req) >>= getResponseBody request <- getRequest w
where req = getRequest (baseUrl ++ w) response <- httpLBS (request)
return (getResponseBody response)
getRequest :: String -> IO Request
getRequest w = parseRequest (baseUrl ++ w)

View File

@ -3,9 +3,10 @@ module Main where
import System.Environment import System.Environment
import Download import Download
import ParseHtml import ParseHtml
import qualified Data.Text.Lazy.Encoding as E
main :: IO () main :: IO ()
main = do main = do
args <- getArgs args <- getArgs
htmlResult <- downloadHtmlDef (head args) htmlResult <- downloadHtmlDef (head args)
printDifferentDef htmlResult printDifferentDef (E.decodeUtf8 htmlResult)

View File

@ -1,16 +1,18 @@
module ParseHtml (printDifferentDef, differentDefTags, parseDefTags) where module ParseHtml (printDifferentDef, differentDefTags, parseDefTags) where
import Text.HTML.TagSoup import Text.HTML.TagSoup
import Prelude
import qualified Data.Text.Lazy as T
data WordDefHeader = WordDefHeader WordName WordType data WordDefHeader = WordDefHeader WordName WordType
data WordName = WordName String data WordName = WordName String
data WordType = WordType String data WordType = WordType String
printDifferentDef :: String -> IO () printDifferentDef :: T.Text -> IO ()
printDifferentDef x = putStrLn (getDifferentDef x) printDifferentDef x = putStrLn (getDifferentDef x)
getDifferentDef :: String -> String getDifferentDef :: T.Text -> String
getDifferentDef x = getDifferentDefMessages (length xs) ++ renderWordList xs getDifferentDef x = getDifferentDefMessages (length xs) ++ renderWordList xs
where xs = (parseDefTags . differentDefTags) x where xs = (parseDefTags . differentDefTags) x
@ -46,7 +48,7 @@ subTags xs = if (length xs) `mod` 3 /= 0
then splitAt 2 xs then splitAt 2 xs
else ((take 1 xs) ++ (take 1 (drop 2 xs)) , drop 3 xs) else ((take 1 xs) ++ (take 1 (drop 2 xs)) , drop 3 xs)
differentDefTags :: String -> [Tag String] differentDefTags :: T.Text -> [Tag String]
differentDefTags = differentDefTags =
filter (~== TagText "") . (takeWhile (~/= "<div id=contentbox>")) filter (~== TagText "") . (takeWhile (~/= "<div id=contentbox>"))
. (dropWhile (~/= "<div id=vtoolbar>")) . parseTags . (dropWhile (~/= "<div id=vtoolbar>")) . parseTags . T.unpack