Unverified Commit 2d49e96a authored by boojack's avatar boojack Committed by GitHub

feat: get image blob in backend (#495)

* feat: get image blob in backend

* chore: update
parent 9036bd47
package crawler package getter
import ( import (
"fmt"
"io" "io"
"net/http" "net/http"
urlUtil "net/url" "net/url"
"golang.org/x/net/html" "golang.org/x/net/html"
"golang.org/x/net/html/atom" "golang.org/x/net/html/atom"
...@@ -15,19 +16,26 @@ type HTMLMeta struct { ...@@ -15,19 +16,26 @@ type HTMLMeta struct {
Image string `json:"image"` Image string `json:"image"`
} }
func GetWebsiteMeta(url string) (*HTMLMeta, error) { func GetHTMLMeta(urlStr string) (*HTMLMeta, error) {
if _, err := urlUtil.Parse(url); err != nil { if _, err := url.Parse(urlStr); err != nil {
return nil, err return nil, err
} }
response, err := http.Get(url) response, err := http.Get(urlStr)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer response.Body.Close() defer response.Body.Close()
htmlMeta := extractHTMLMeta(response.Body) mediatype, err := getMediatype(response)
if err != nil {
return nil, err
}
if mediatype != "text/html" {
return nil, fmt.Errorf("Wrong website mediatype")
}
htmlMeta := extractHTMLMeta(response.Body)
return htmlMeta, nil return htmlMeta, nil
} }
......
package crawler package getter
import ( import (
"testing" "testing"
...@@ -6,19 +6,19 @@ import ( ...@@ -6,19 +6,19 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func TestGetWebsiteMeta(t *testing.T) { func TestGetHTMLMeta(t *testing.T) {
tests := []struct { tests := []struct {
url string urlStr string
htmlMeta HTMLMeta htmlMeta HTMLMeta
}{ }{
{ {
url: "https://baidu.com", urlStr: "https://baidu.com",
htmlMeta: HTMLMeta{ htmlMeta: HTMLMeta{
Title: "百度一下,你就知道", Title: "百度一下,你就知道",
}, },
}, },
{ {
url: "https://www.bytebase.com/blog/sql-review-tool-for-devs", urlStr: "https://www.bytebase.com/blog/sql-review-tool-for-devs",
htmlMeta: HTMLMeta{ htmlMeta: HTMLMeta{
Title: "The SQL Review Tool for Developers", Title: "The SQL Review Tool for Developers",
Description: "Reviewing SQL can be somewhat tedious, yet is essential to keep your database fleet reliable. At Bytebase, we are building a developer-first SQL review tool to empower the DevOps system.", Description: "Reviewing SQL can be somewhat tedious, yet is essential to keep your database fleet reliable. At Bytebase, we are building a developer-first SQL review tool to empower the DevOps system.",
...@@ -27,7 +27,7 @@ func TestGetWebsiteMeta(t *testing.T) { ...@@ -27,7 +27,7 @@ func TestGetWebsiteMeta(t *testing.T) {
}, },
} }
for _, test := range tests { for _, test := range tests {
metadata, err := GetWebsiteMeta(test.url) metadata, err := GetHTMLMeta(test.urlStr)
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, test.htmlMeta, *metadata) require.Equal(t, test.htmlMeta, *metadata)
} }
......
// crawler is using to get resources from url. // getter is using to get resources from url.
// * Get metadata for website; // * Get metadata for website;
// * Get image blob to avoid CORS; // * Get image blob to avoid CORS;
package crawler package getter
package getter
import (
"fmt"
"io"
"net/http"
"net/url"
"strings"
)
type Image struct {
Blob []byte
Mediatype string
}
func GetImage(urlStr string) (*Image, error) {
if _, err := url.Parse(urlStr); err != nil {
return nil, err
}
response, err := http.Get(urlStr)
if err != nil {
return nil, err
}
defer response.Body.Close()
mediatype, err := getMediatype(response)
if err != nil {
return nil, err
}
if !strings.HasPrefix(mediatype, "image/") {
return nil, fmt.Errorf("Wrong image mediatype")
}
bodyBytes, err := io.ReadAll(response.Body)
if err != nil {
return nil, err
}
image := &Image{
Blob: bodyBytes,
Mediatype: mediatype,
}
return image, nil
}
package getter
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestGetImage(t *testing.T) {
tests := []struct {
urlStr string
}{
{
urlStr: "https://star-history.com/bytebase.webp",
},
}
for _, test := range tests {
_, err := GetImage(test.urlStr)
require.NoError(t, err)
}
}
package getter
import (
"mime"
"net/http"
)
func getMediatype(response *http.Response) (string, error) {
contentType := response.Header.Get("content-type")
mediatype, _, err := mime.ParseMediaType(contentType)
if err != nil {
return "", err
}
return mediatype, nil
}
...@@ -4,28 +4,32 @@ import ( ...@@ -4,28 +4,32 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"net/http" "net/http"
"net/url"
"github.com/labstack/echo/v4" "github.com/labstack/echo/v4"
"github.com/usememos/memos/plugin/crawler" getter "github.com/usememos/memos/plugin/http_getter"
metric "github.com/usememos/memos/plugin/metrics" metric "github.com/usememos/memos/plugin/metrics"
) )
func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) { func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) {
g.GET("/crawler/website", func(c echo.Context) error { g.GET("/get/httpmeta", func(c echo.Context) error {
ctx := c.Request().Context() ctx := c.Request().Context()
url := c.QueryParam("url") urlStr := c.QueryParam("url")
if url == "" { if urlStr == "" {
return echo.NewHTTPError(http.StatusBadRequest, "Missing website url") return echo.NewHTTPError(http.StatusBadRequest, "Missing website url")
} }
if _, err := url.Parse(urlStr); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err)
}
htmlMeta, err := crawler.GetWebsiteMeta(url) htmlMeta, err := getter.GetHTMLMeta(urlStr)
if err != nil { if err != nil {
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", url)).SetInternal(err) return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", urlStr)).SetInternal(err)
} }
s.Collector.Collect(ctx, &metric.Metric{ s.Collector.Collect(ctx, &metric.Metric{
Name: "crawler used", Name: "getter used",
Labels: map[string]string{ Labels: map[string]string{
"type": "website", "type": "httpmeta",
}, },
}) })
...@@ -35,4 +39,32 @@ func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) { ...@@ -35,4 +39,32 @@ func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) {
} }
return nil return nil
}) })
g.GET("/get/image", func(c echo.Context) error {
ctx := c.Request().Context()
urlStr := c.QueryParam("url")
if urlStr == "" {
return echo.NewHTTPError(http.StatusBadRequest, "Missing image url")
}
if _, err := url.Parse(urlStr); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err)
}
image, err := getter.GetImage(urlStr)
if err != nil {
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get image url: %s", urlStr)).SetInternal(err)
}
s.Collector.Collect(ctx, &metric.Metric{
Name: "getter used",
Labels: map[string]string{
"type": "image",
},
})
c.Response().Writer.WriteHeader(http.StatusOK)
c.Response().Writer.Header().Set("Content-Type", image.Mediatype)
if _, err := c.Response().Writer.Write(image.Blob); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write image blob").SetInternal(err)
}
return nil
})
} }
...@@ -161,7 +161,6 @@ func (s *Server) registerResourceRoutes(g *echo.Group) { ...@@ -161,7 +161,6 @@ func (s *Server) registerResourceRoutes(g *echo.Group) {
if _, err := c.Response().Writer.Write(resource.Blob); err != nil { if _, err := c.Response().Writer.Write(resource.Blob); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write resource blob").SetInternal(err) return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write resource blob").SetInternal(err)
} }
return nil return nil
}) })
......
...@@ -8,7 +8,8 @@ const renderer = (rawStr: string): string => { ...@@ -8,7 +8,8 @@ const renderer = (rawStr: string): string => {
return rawStr; return rawStr;
} }
return `<img class='img' src='${escape(matchResult[1])}' />`; // NOTE: Get image blob from backend to avoid CORS.
return `<img class='img' src='/o/get/image?url=${escape(matchResult[1])}' />`;
}; };
export default { export default {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment