package babelio import ( "errors" "fmt" "io" "net/http" "strings" "git.artlef.fr/bibliomane/internal/callapiutils" "git.artlef.fr/bibliomane/internal/myvalidator" "github.com/PuerkitoBio/goquery" "golang.org/x/text/encoding/charmap" ) type babelioSearchArg struct { Term string `json:"term"` } type babelioSearchResult struct { //only parsing the url Url string `json:"url"` } func GetDescriptionFromISBN(baseUrl string, isbn string) (string, error) { url, err := searchPageIsbn(baseUrl, isbn) if err != nil { return "", err } //we either find the full summary, or we have to make another call to get it. fullSummary, payloadToQuery, err := parseBookPage(baseUrl, url) if err != nil { return "", err } if fullSummary != "" { return decodeAndCleanText(strings.NewReader(fullSummary)), err } else if payloadToQuery != "" { return queryDescription(baseUrl, payloadToQuery) } else { return "", nil } } func searchPageIsbn(baseUrl, isbn string) (string, error) { searchUrl, err := callapiutils.ComputeUrl(baseUrl, "aj_recherche.php") if err != nil { return "", err } term := babelioSearchArg{Term: isbn} var searchResults []babelioSearchResult callapiutils.FetchAndParseResultFromPost(searchUrl, &term, &searchResults) if len(searchResults) == 0 { return "", myvalidator.TranslatedError{Err: errors.New("ISBNNotFoundBabelio"), Arg: isbn} } return searchResults[0].Url, nil } func parseBookPage(baseUrl, bookUrl string) (string, string, error) { url, err := callapiutils.ComputeUrl(baseUrl, bookUrl) if err != nil { return "", "", err } resp, err := http.Get(url.String()) if err != nil { return "", "", err } defer resp.Body.Close() doc, err := goquery.NewDocumentFromReader(resp.Body) //we either find the full summary, or we have to make another call to get it. fullsummary := "" jsToParse := "" doc.Find(".livre_resume").Each(func(i int, s *goquery.Selection) { onclick, ok := s.Find("a").Attr("onclick") if ok { jsToParse = onclick } else { fullsummary = s.Text() } }) if fullsummary != "" { return fullsummary, "", nil } typeStr, idObj, err := extractNumbersFromExpression(jsToParse) if err != nil { return "", "", err } return "", fmt.Sprintf("type=%s&id_obj=%s", typeStr, idObj), nil } func extractNumbersFromExpression(jsToParse string) (string, string, error) { splitted := strings.Split(jsToParse, ",") if len(splitted) < 3 { return "", "", myvalidator.TranslatedError{Err: errors.New("BabelioParseError")} } if len(splitted[2]) < 3 { return "", "", myvalidator.TranslatedError{Err: errors.New("BabelioParseError")} } return splitted[1], splitted[2][:len(splitted[2])-2], nil } func queryDescription(baseUrl string, payloadToQuery string) (string, error) { url, err := callapiutils.ComputeUrl(baseUrl, "aj_voir_plus_a.php") if err != nil { return "", err } resp, err := http.Post(url.String(), "application/x-www-form-urlencoded; charset=UTF-8", strings.NewReader(payloadToQuery)) if err != nil { return "", err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return "", myvalidator.TranslatedError{Err: fmt.Errorf("BabelioFetchDescError")} } return decodeAndCleanText(resp.Body), nil } func decodeAndCleanText(reader io.Reader) string { tr := charmap.Windows1252.NewDecoder().Reader(reader) var decodedString strings.Builder io.Copy(&decodedString, tr) return strings.TrimSpace(strings.ReplaceAll(decodedString.String(), "
", "\n")) }