Change type and HTTP library to improve encoding
This commit is contained in:
parent
5a02d92848
commit
f6782b32f5
15
Download.hs
15
Download.hs
@ -1,10 +1,15 @@
|
|||||||
module Download (downloadHtmlDef) where
|
module Download (downloadHtmlDef) where
|
||||||
|
|
||||||
import Network.HTTP
|
import Data.ByteString.Lazy
|
||||||
|
import Network.HTTP.Simple
|
||||||
|
|
||||||
baseUrl = "http://www.cnrtl.fr/definition/"
|
baseUrl = "http://www.cnrtl.fr/definition/"
|
||||||
|
|
||||||
downloadHtmlDef :: String -> IO String
|
downloadHtmlDef :: String -> IO ByteString
|
||||||
downloadHtmlDef w =
|
downloadHtmlDef w = do
|
||||||
simpleHTTP (req) >>= getResponseBody
|
request <- getRequest w
|
||||||
where req = getRequest (baseUrl ++ w)
|
response <- httpLBS (request)
|
||||||
|
return (getResponseBody response)
|
||||||
|
|
||||||
|
getRequest :: String -> IO Request
|
||||||
|
getRequest w = parseRequest (baseUrl ++ w)
|
||||||
|
3
Main.hs
3
Main.hs
@ -3,9 +3,10 @@ module Main where
|
|||||||
import System.Environment
|
import System.Environment
|
||||||
import Download
|
import Download
|
||||||
import ParseHtml
|
import ParseHtml
|
||||||
|
import qualified Data.Text.Lazy.Encoding as E
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = do
|
main = do
|
||||||
args <- getArgs
|
args <- getArgs
|
||||||
htmlResult <- downloadHtmlDef (head args)
|
htmlResult <- downloadHtmlDef (head args)
|
||||||
printDifferentDef htmlResult
|
printDifferentDef (E.decodeUtf8 htmlResult)
|
||||||
|
10
ParseHtml.hs
10
ParseHtml.hs
@ -1,16 +1,18 @@
|
|||||||
module ParseHtml (printDifferentDef, differentDefTags, parseDefTags) where
|
module ParseHtml (printDifferentDef, differentDefTags, parseDefTags) where
|
||||||
|
|
||||||
import Text.HTML.TagSoup
|
import Text.HTML.TagSoup
|
||||||
|
import Prelude
|
||||||
|
import qualified Data.Text.Lazy as T
|
||||||
|
|
||||||
data WordDefHeader = WordDefHeader WordName WordType
|
data WordDefHeader = WordDefHeader WordName WordType
|
||||||
data WordName = WordName String
|
data WordName = WordName String
|
||||||
data WordType = WordType String
|
data WordType = WordType String
|
||||||
|
|
||||||
printDifferentDef :: String -> IO ()
|
printDifferentDef :: T.Text -> IO ()
|
||||||
printDifferentDef x = putStrLn (getDifferentDef x)
|
printDifferentDef x = putStrLn (getDifferentDef x)
|
||||||
|
|
||||||
|
|
||||||
getDifferentDef :: String -> String
|
getDifferentDef :: T.Text -> String
|
||||||
getDifferentDef x = getDifferentDefMessages (length xs) ++ renderWordList xs
|
getDifferentDef x = getDifferentDefMessages (length xs) ++ renderWordList xs
|
||||||
where xs = (parseDefTags . differentDefTags) x
|
where xs = (parseDefTags . differentDefTags) x
|
||||||
|
|
||||||
@ -46,7 +48,7 @@ subTags xs = if (length xs) `mod` 3 /= 0
|
|||||||
then splitAt 2 xs
|
then splitAt 2 xs
|
||||||
else ((take 1 xs) ++ (take 1 (drop 2 xs)) , drop 3 xs)
|
else ((take 1 xs) ++ (take 1 (drop 2 xs)) , drop 3 xs)
|
||||||
|
|
||||||
differentDefTags :: String -> [Tag String]
|
differentDefTags :: T.Text -> [Tag String]
|
||||||
differentDefTags =
|
differentDefTags =
|
||||||
filter (~== TagText "") . (takeWhile (~/= "<div id=contentbox>"))
|
filter (~== TagText "") . (takeWhile (~/= "<div id=contentbox>"))
|
||||||
. (dropWhile (~/= "<div id=vtoolbar>")) . parseTags
|
. (dropWhile (~/= "<div id=vtoolbar>")) . parseTags . T.unpack
|
||||||
|
Loading…
Reference in New Issue
Block a user