Files
bibliomane/internal/babelio/babelio.go

129 lines
3.4 KiB
Go

package babelio
import (
"errors"
"fmt"
"io"
"net/http"
"strings"
"git.artlef.fr/bibliomane/internal/callapiutils"
"git.artlef.fr/bibliomane/internal/myvalidator"
"github.com/PuerkitoBio/goquery"
"golang.org/x/text/encoding/charmap"
)
type babelioSearchArg struct {
Term string `json:"term"`
}
type babelioSearchResult struct {
//only parsing the url
Url string `json:"url"`
}
func GetDescriptionFromISBN(baseUrl string, isbn string) (string, error) {
url, err := searchPageIsbn(baseUrl, isbn)
if err != nil {
return "", err
}
//we either find the full summary, or we have to make another call to get it.
fullSummary, payloadToQuery, err := parseBookPage(baseUrl, url)
if err != nil {
return "", err
}
if fullSummary != "" {
return decodeAndCleanText(strings.NewReader(fullSummary)), err
} else if payloadToQuery != "" {
return queryDescription(baseUrl, payloadToQuery)
} else {
return "", nil
}
}
func searchPageIsbn(baseUrl, isbn string) (string, error) {
searchUrl, err := callapiutils.ComputeUrl(baseUrl, "aj_recherche.php")
if err != nil {
return "", err
}
term := babelioSearchArg{Term: isbn}
var searchResults []babelioSearchResult
callapiutils.FetchAndParseResultFromPost(searchUrl, &term, &searchResults)
if len(searchResults) == 0 {
return "", myvalidator.TranslatedError{Err: errors.New("ISBNNotFoundBabelio")}
}
return searchResults[0].Url, nil
}
func parseBookPage(baseUrl, bookUrl string) (string, string, error) {
url, err := callapiutils.ComputeUrl(baseUrl, bookUrl)
if err != nil {
return "", "", err
}
resp, err := http.Get(url.String())
if err != nil {
return "", "", err
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
//we either find the full summary, or we have to make another call to get it.
fullsummary := ""
jsToParse := ""
doc.Find(".livre_resume").Each(func(i int, s *goquery.Selection) {
onclick, ok := s.Find("a").Attr("onclick")
if ok {
jsToParse = onclick
} else {
fullsummary = s.Text()
}
})
if fullsummary != "" {
return fullsummary, "", nil
}
typeStr, idObj, err := extractNumbersFromExpression(jsToParse)
if err != nil {
return "", "", err
}
return "", fmt.Sprintf("type=%s&id_obj=%s", typeStr, idObj), nil
}
func extractNumbersFromExpression(jsToParse string) (string, string, error) {
splitted := strings.Split(jsToParse, ",")
if len(splitted) < 3 {
return "", "", myvalidator.TranslatedError{Err: errors.New("BabelioParseError")}
}
if len(splitted[2]) < 3 {
return "", "", myvalidator.TranslatedError{Err: errors.New("BabelioParseError")}
}
return splitted[1], splitted[2][:len(splitted[2])-2], nil
}
func queryDescription(baseUrl string, payloadToQuery string) (string, error) {
url, err := callapiutils.ComputeUrl(baseUrl, "aj_voir_plus_a.php")
if err != nil {
return "", err
}
resp, err := http.Post(url.String(),
"application/x-www-form-urlencoded; charset=UTF-8",
strings.NewReader(payloadToQuery))
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", myvalidator.TranslatedError{Err: fmt.Errorf("BabelioFetchDescError")}
}
return decodeAndCleanText(resp.Body), nil
}
func decodeAndCleanText(reader io.Reader) string {
tr := charmap.Windows1252.NewDecoder().Reader(reader)
var decodedString strings.Builder
io.Copy(&decodedString, tr)
return strings.TrimSpace(strings.ReplaceAll(decodedString.String(), "<br>", "\n"))
}