Improve parsing
This commit is contained in:
parent
3a38612d1d
commit
01cc700706
22
ParseHtml.hs
22
ParseHtml.hs
@ -1,4 +1,4 @@
|
||||
module ParseHtml (printDef, printDifferentDef) where
|
||||
module ParseHtml (printDef, printDifferentDef, filterSupTags, findRelevantTags) where
|
||||
|
||||
import Text.HTML.TagSoup
|
||||
import Prelude
|
||||
@ -54,10 +54,18 @@ parseFullDefTags :: T.Text -> [Tag String]
|
||||
parseFullDefTags = findRelevantTags . parseTags . T.unpack
|
||||
|
||||
findRelevantTags :: [Tag String] -> [Tag String]
|
||||
findRelevantTags [] = []
|
||||
findRelevantTags (x:xs) = if x ~== "<span class=tlf_cdefinition"
|
||||
|| x ~== "<span class=tlf_cexemple"
|
||||
then maybe (f xs) (\y -> y:(f xs)) (L.find (~== TagText "") xs)
|
||||
else (f xs)
|
||||
findRelevantTags tags =
|
||||
filter (~== TagText "") filteredTags
|
||||
where
|
||||
f = findRelevantTags
|
||||
filteredTags =
|
||||
takeWhile (~/= "<div id=footer>") $
|
||||
dropWhile (~/= "<span class=tlf_cdefinition>") $
|
||||
filterSupTags tags
|
||||
|
||||
filterSupTags :: [Tag String] -> [Tag String]
|
||||
filterSupTags tags = filter (/= TagText "willBeDeleted") $ snd $ L.mapAccumL transformSupText (TagText "") tags
|
||||
where
|
||||
transformSupText x y
|
||||
| y ~== "<sup>" = (TagText "willBeDeleted", TagText "willBeDeleted")
|
||||
| x == TagText "willBeDeleted" = (TagText "", TagText "willBeDeleted")
|
||||
| otherwise = (x,y)
|
||||
|
Loading…
Reference in New Issue
Block a user