Only extract the first element as title
This commit is contained in:
@@ -7,7 +7,6 @@ package readability
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"golang.org/x/text/encoding/htmlindex"
|
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"math"
|
"math"
|
||||||
@@ -15,6 +14,8 @@ import (
|
|||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/text/encoding/htmlindex"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
)
|
)
|
||||||
@@ -129,7 +130,7 @@ func readDocumentWithEncoding(content []byte) (*goquery.Document, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func getTitle(document *goquery.Document) string {
|
func getTitle(document *goquery.Document) string {
|
||||||
title := document.Find("title").Text()
|
title := document.Find("title").First().Text()
|
||||||
return title
|
return title
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
25
readability_test.go
Normal file
25
readability_test.go
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
package readability_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.kopis.de/carsten/readability"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFullHtml(t *testing.T) {
|
||||||
|
b, err := os.ReadFile("test1.html")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal("Can not read file")
|
||||||
|
}
|
||||||
|
|
||||||
|
reader := bytes.NewReader(b)
|
||||||
|
title, _, err := readability.ExtractContent(".", reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal("Can not extract content")
|
||||||
|
}
|
||||||
|
if title != "The Verge" {
|
||||||
|
t.Fatalf("Unexpected title: %s", title)
|
||||||
|
}
|
||||||
|
}
|
||||||
9
test1.html
Normal file
9
test1.html
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user