Add simple parse html

This commit is contained in:
artlef 2018-05-01 15:13:49 +02:00
parent 7105cc28d2
commit e0f75fe5b8
3 changed files with 20 additions and 2 deletions

View File

@ -2,9 +2,10 @@ module Main where
import System.Environment import System.Environment
import Download import Download
import ParseHtml
main :: IO () main :: IO ()
main = do main = do
args <- getArgs args <- getArgs
htmlResult <- downloadHtmlDef (head args) htmlResult <- downloadHtmlDef (head args)
putStrLn htmlResult printDifferentDef htmlResult

17
ParseHtml.hs Normal file
View File

@ -0,0 +1,17 @@
module ParseHtml (printDifferentDef) where
import Text.HTML.TagSoup
printDifferentDef :: String -> IO ()
printDifferentDef x = putStrLn (getDifferentDefMessages x)
getDifferentDefMessages :: String -> String
getDifferentDefMessages x = renderTags (differentDefTags x)
differentDefTags :: String -> [Tag String]
differentDefTags =
filter (~== TagText "") . (takeWhile (~/= "<div id=contentbox>"))
. (dropWhile (~/= "<div id=vtoolbar>")) . parseTags
numberOfDef :: [Tag String] -> Int
numberOfDef tags = ((length tags) + 1) `div` 3

View File

@ -2,7 +2,7 @@ CC=ghc
ARGS=--make -dynamic ARGS=--make -dynamic
FILENAME=dictfr FILENAME=dictfr
all: all:
$(CC) $(ARGS) -o $(FILENAME) Main.hs Download.hs $(CC) $(ARGS) -o $(FILENAME) Main.hs Download.hs ParseHtml.hs
clean: clean:
rm -f *.o *.hi rm -f *.o *.hi