get tags for full definition

This commit is contained in:
artlef 2018-05-28 23:15:16 +02:00
parent 2d76314eaf
commit 221e5a60cf
2 changed files with 22 additions and 11 deletions

View File

@ -15,7 +15,8 @@ parse ("-d":[]) = usage
parse ("-d":n:[]) = usage parse ("-d":n:[]) = usage
parse ("-d":n:fs) = do parse ("-d":n:fs) = do
htmlResult <- downloadHtmlDef n (head fs) htmlResult <- downloadHtmlDef n (head fs)
printDef (E.decodeUtf8 htmlResult) let n' = read n :: Int
printDef (E.decodeUtf8 htmlResult) n'
parse fs = do parse fs = do
htmlResult <- downloadHtmlAvailableDef (head fs) htmlResult <- downloadHtmlAvailableDef (head fs)
printDifferentDef (E.decodeUtf8 htmlResult) printDifferentDef (E.decodeUtf8 htmlResult)

View File

@ -3,6 +3,7 @@ module ParseHtml (printDef, printDifferentDef) where
import Text.HTML.TagSoup import Text.HTML.TagSoup
import Prelude import Prelude
import WordDef import WordDef
import qualified Data.List as L
import qualified Data.Text.Lazy as T import qualified Data.Text.Lazy as T
@ -37,17 +38,26 @@ differentDefTags =
filter (~== TagText "") . (takeWhile (~/= "<div id=contentbox>")) filter (~== TagText "") . (takeWhile (~/= "<div id=contentbox>"))
. (dropWhile (~/= "<div id=vtoolbar>")) . parseTags . T.unpack . (dropWhile (~/= "<div id=vtoolbar>")) . parseTags . T.unpack
printDef :: T.Text -> IO () printDef :: T.Text -> Int -> IO ()
printDef = putStrLn . getDef printDef t n = putStrLn (getDef t n)
getDef :: T.Text -> String getDef :: T.Text -> Int -> String
getDef = (renderFullDef . getFullDef . parseFullDefTags) getDef t n = renderFullDef (getFullDef t n)
getFullDef :: [Tag String] -> WordFullDef getFullDef :: T.Text -> Int -> WordFullDef
getFullDef x = WordFullDef (WordDefHeader (WordName "lol") (WordType getFullDef t n = WordFullDef header defList
"Interjection")) [WordDefSentence "Laughing Out Loud", where
WordExampleSentence "ur wrong lol"] header = ((parseDefTags . differentDefTags) t !! (n-1))
defList = map (WordDefSentence . renderTags) [parseFullDefTags t]
parseFullDefTags :: T.Text -> [Tag String] parseFullDefTags :: T.Text -> [Tag String]
parseFullDefTags = filter (~== "<span class=tlf_cdefinition") . parseTags parseFullDefTags = findRelevantTags . parseTags . T.unpack
. T.unpack
findRelevantTags :: [Tag String] -> [Tag String]
findRelevantTags [] = []
findRelevantTags (x:xs) = if x ~== "<span class=tlf_cdefinition"
|| x ~== "<span class=tlf_cexemple"
then maybe (f xs) (\y -> y:(f xs)) (L.find (~== TagText "") xs)
else (f xs)
where
f = findRelevantTags