1
0

Only extract the first element as title

This commit is contained in:
2025-06-04 13:51:29 +02:00
parent a5fec64225
commit fec68c9645
3 changed files with 37 additions and 2 deletions

View File

@@ -7,7 +7,6 @@ package readability
import (
"bytes"
"fmt"
"golang.org/x/text/encoding/htmlindex"
"io"
"log"
"math"
@@ -15,6 +14,8 @@ import (
"regexp"
"strings"
"golang.org/x/text/encoding/htmlindex"
"github.com/PuerkitoBio/goquery"
"golang.org/x/net/html"
)
@@ -129,7 +130,7 @@ func readDocumentWithEncoding(content []byte) (*goquery.Document, error) {
}
func getTitle(document *goquery.Document) string {
title := document.Find("title").Text()
title := document.Find("title").First().Text()
return title
}

25
readability_test.go Normal file
View File

@@ -0,0 +1,25 @@
package readability_test
import (
"bytes"
"os"
"testing"
"git.kopis.de/carsten/readability"
)
func TestFullHtml(t *testing.T) {
b, err := os.ReadFile("test1.html")
if err != nil {
t.Fatal("Can not read file")
}
reader := bytes.NewReader(b)
title, _, err := readability.ExtractContent(".", reader)
if err != nil {
t.Fatal("Can not extract content")
}
if title != "The Verge" {
t.Fatalf("Unexpected title: %s", title)
}
}

9
test1.html Normal file

File diff suppressed because one or more lines are too long