Improve parsing
This commit is contained in:
parent
3a38612d1d
commit
01cc700706
22
ParseHtml.hs
22
ParseHtml.hs
@ -1,4 +1,4 @@
|
|||||||
module ParseHtml (printDef, printDifferentDef) where
|
module ParseHtml (printDef, printDifferentDef, filterSupTags, findRelevantTags) where
|
||||||
|
|
||||||
import Text.HTML.TagSoup
|
import Text.HTML.TagSoup
|
||||||
import Prelude
|
import Prelude
|
||||||
@ -54,10 +54,18 @@ parseFullDefTags :: T.Text -> [Tag String]
|
|||||||
parseFullDefTags = findRelevantTags . parseTags . T.unpack
|
parseFullDefTags = findRelevantTags . parseTags . T.unpack
|
||||||
|
|
||||||
findRelevantTags :: [Tag String] -> [Tag String]
|
findRelevantTags :: [Tag String] -> [Tag String]
|
||||||
findRelevantTags [] = []
|
findRelevantTags tags =
|
||||||
findRelevantTags (x:xs) = if x ~== "<span class=tlf_cdefinition"
|
filter (~== TagText "") filteredTags
|
||||||
|| x ~== "<span class=tlf_cexemple"
|
|
||||||
then maybe (f xs) (\y -> y:(f xs)) (L.find (~== TagText "") xs)
|
|
||||||
else (f xs)
|
|
||||||
where
|
where
|
||||||
f = findRelevantTags
|
filteredTags =
|
||||||
|
takeWhile (~/= "<div id=footer>") $
|
||||||
|
dropWhile (~/= "<span class=tlf_cdefinition>") $
|
||||||
|
filterSupTags tags
|
||||||
|
|
||||||
|
filterSupTags :: [Tag String] -> [Tag String]
|
||||||
|
filterSupTags tags = filter (/= TagText "willBeDeleted") $ snd $ L.mapAccumL transformSupText (TagText "") tags
|
||||||
|
where
|
||||||
|
transformSupText x y
|
||||||
|
| y ~== "<sup>" = (TagText "willBeDeleted", TagText "willBeDeleted")
|
||||||
|
| x == TagText "willBeDeleted" = (TagText "", TagText "willBeDeleted")
|
||||||
|
| otherwise = (x,y)
|
||||||
|
Loading…
Reference in New Issue
Block a user