Only extract the first element as title
This commit is contained in:
@@ -7,7 +7,6 @@ package readability
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"golang.org/x/text/encoding/htmlindex"
|
||||
"io"
|
||||
"log"
|
||||
"math"
|
||||
@@ -15,6 +14,8 @@ import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/encoding/htmlindex"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
@@ -129,7 +130,7 @@ func readDocumentWithEncoding(content []byte) (*goquery.Document, error) {
|
||||
}
|
||||
|
||||
func getTitle(document *goquery.Document) string {
|
||||
title := document.Find("title").Text()
|
||||
title := document.Find("title").First().Text()
|
||||
return title
|
||||
}
|
||||
|
||||
|
||||
25
readability_test.go
Normal file
25
readability_test.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package readability_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"git.kopis.de/carsten/readability"
|
||||
)
|
||||
|
||||
func TestFullHtml(t *testing.T) {
|
||||
b, err := os.ReadFile("test1.html")
|
||||
if err != nil {
|
||||
t.Fatal("Can not read file")
|
||||
}
|
||||
|
||||
reader := bytes.NewReader(b)
|
||||
title, _, err := readability.ExtractContent(".", reader)
|
||||
if err != nil {
|
||||
t.Fatal("Can not extract content")
|
||||
}
|
||||
if title != "The Verge" {
|
||||
t.Fatalf("Unexpected title: %s", title)
|
||||
}
|
||||
}
|
||||
9
test1.html
Normal file
9
test1.html
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user