Change type and HTTP library to improve encoding
This commit is contained in:
parent
5a02d92848
commit
f6782b32f5
15
Download.hs
15
Download.hs
@ -1,10 +1,15 @@
|
||||
module Download (downloadHtmlDef) where
|
||||
|
||||
import Network.HTTP
|
||||
import Data.ByteString.Lazy
|
||||
import Network.HTTP.Simple
|
||||
|
||||
baseUrl = "http://www.cnrtl.fr/definition/"
|
||||
|
||||
downloadHtmlDef :: String -> IO String
|
||||
downloadHtmlDef w =
|
||||
simpleHTTP (req) >>= getResponseBody
|
||||
where req = getRequest (baseUrl ++ w)
|
||||
downloadHtmlDef :: String -> IO ByteString
|
||||
downloadHtmlDef w = do
|
||||
request <- getRequest w
|
||||
response <- httpLBS (request)
|
||||
return (getResponseBody response)
|
||||
|
||||
getRequest :: String -> IO Request
|
||||
getRequest w = parseRequest (baseUrl ++ w)
|
||||
|
3
Main.hs
3
Main.hs
@ -3,9 +3,10 @@ module Main where
|
||||
import System.Environment
|
||||
import Download
|
||||
import ParseHtml
|
||||
import qualified Data.Text.Lazy.Encoding as E
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
args <- getArgs
|
||||
htmlResult <- downloadHtmlDef (head args)
|
||||
printDifferentDef htmlResult
|
||||
printDifferentDef (E.decodeUtf8 htmlResult)
|
||||
|
10
ParseHtml.hs
10
ParseHtml.hs
@ -1,16 +1,18 @@
|
||||
module ParseHtml (printDifferentDef, differentDefTags, parseDefTags) where
|
||||
|
||||
import Text.HTML.TagSoup
|
||||
import Prelude
|
||||
import qualified Data.Text.Lazy as T
|
||||
|
||||
data WordDefHeader = WordDefHeader WordName WordType
|
||||
data WordName = WordName String
|
||||
data WordType = WordType String
|
||||
|
||||
printDifferentDef :: String -> IO ()
|
||||
printDifferentDef :: T.Text -> IO ()
|
||||
printDifferentDef x = putStrLn (getDifferentDef x)
|
||||
|
||||
|
||||
getDifferentDef :: String -> String
|
||||
getDifferentDef :: T.Text -> String
|
||||
getDifferentDef x = getDifferentDefMessages (length xs) ++ renderWordList xs
|
||||
where xs = (parseDefTags . differentDefTags) x
|
||||
|
||||
@ -46,7 +48,7 @@ subTags xs = if (length xs) `mod` 3 /= 0
|
||||
then splitAt 2 xs
|
||||
else ((take 1 xs) ++ (take 1 (drop 2 xs)) , drop 3 xs)
|
||||
|
||||
differentDefTags :: String -> [Tag String]
|
||||
differentDefTags :: T.Text -> [Tag String]
|
||||
differentDefTags =
|
||||
filter (~== TagText "") . (takeWhile (~/= "<div id=contentbox>"))
|
||||
. (dropWhile (~/= "<div id=vtoolbar>")) . parseTags
|
||||
. (dropWhile (~/= "<div id=vtoolbar>")) . parseTags . T.unpack
|
||||
|
Loading…
Reference in New Issue
Block a user