Unverified Commit 1e688b2a authored by memoclaw's avatar memoclaw Committed by GitHub

feat: extract title from first H1 heading into memo property (#5726)

Co-authored-by: 's avatarClaude Opus 4.6 (1M context) <noreply@anthropic.com>
parent b8e9ee2b
...@@ -138,6 +138,26 @@ func (s *service) ExtractTags(content []byte) ([]string, error) { ...@@ -138,6 +138,26 @@ func (s *service) ExtractTags(content []byte) ([]string, error) {
return uniquePreserveCase(tags), nil return uniquePreserveCase(tags), nil
} }
// extractHeadingText extracts plain text content from a heading node.
func extractHeadingText(n gast.Node, source []byte) string {
var buf strings.Builder
for child := n.FirstChild(); child != nil; child = child.NextSibling() {
extractTextFromNode(child, source, &buf)
}
return buf.String()
}
// extractTextFromNode recursively extracts plain text from a node and its children.
func extractTextFromNode(n gast.Node, source []byte, buf *strings.Builder) {
if textNode, ok := n.(*gast.Text); ok {
buf.Write(textNode.Segment.Value(source))
return
}
for child := n.FirstChild(); child != nil; child = child.NextSibling() {
extractTextFromNode(child, source, buf)
}
}
// ExtractProperties computes boolean properties about the content. // ExtractProperties computes boolean properties about the content.
func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error) { func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error) {
root, err := s.parse(content) root, err := s.parse(content)
...@@ -146,12 +166,21 @@ func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Proper ...@@ -146,12 +166,21 @@ func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Proper
} }
prop := &storepb.MemoPayload_Property{} prop := &storepb.MemoPayload_Property{}
firstBlockChecked := false
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) { err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering { if !entering {
return gast.WalkContinue, nil return gast.WalkContinue, nil
} }
// Check if the first block-level child of the document is an H1 heading.
if !firstBlockChecked && n.Parent() != nil && n.Parent().Kind() == gast.KindDocument {
firstBlockChecked = true
if heading, ok := n.(*gast.Heading); ok && heading.Level == 1 {
prop.Title = extractHeadingText(n, content)
}
}
switch n.Kind() { switch n.Kind() {
case gast.KindLink: case gast.KindLink:
prop.HasLink = true prop.HasLink = true
...@@ -302,6 +331,8 @@ func (s *service) ExtractAll(content []byte) (*ExtractedData, error) { ...@@ -302,6 +331,8 @@ func (s *service) ExtractAll(content []byte) (*ExtractedData, error) {
Property: &storepb.MemoPayload_Property{}, Property: &storepb.MemoPayload_Property{},
} }
firstBlockChecked := false
// Single walk to collect all data // Single walk to collect all data
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) { err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering { if !entering {
...@@ -313,6 +344,14 @@ func (s *service) ExtractAll(content []byte) (*ExtractedData, error) { ...@@ -313,6 +344,14 @@ func (s *service) ExtractAll(content []byte) (*ExtractedData, error) {
data.Tags = append(data.Tags, string(tagNode.Tag)) data.Tags = append(data.Tags, string(tagNode.Tag))
} }
// Check if the first block-level child of the document is an H1 heading.
if !firstBlockChecked && n.Parent() != nil && n.Parent().Kind() == gast.KindDocument {
firstBlockChecked = true
if heading, ok := n.(*gast.Heading); ok && heading.Level == 1 {
data.Property.Title = extractHeadingText(n, content)
}
}
// Extract properties based on node kind // Extract properties based on node kind
switch n.Kind() { switch n.Kind() {
case gast.KindLink: case gast.KindLink:
......
...@@ -190,6 +190,7 @@ func TestExtractProperties(t *testing.T) { ...@@ -190,6 +190,7 @@ func TestExtractProperties(t *testing.T) {
hasCode bool hasCode bool
hasTasks bool hasTasks bool
hasInc bool hasInc bool
title string
}{ }{
{ {
name: "plain text", name: "plain text",
...@@ -198,6 +199,7 @@ func TestExtractProperties(t *testing.T) { ...@@ -198,6 +199,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: false, hasCode: false,
hasTasks: false, hasTasks: false,
hasInc: false, hasInc: false,
title: "",
}, },
{ {
name: "with link", name: "with link",
...@@ -206,6 +208,7 @@ func TestExtractProperties(t *testing.T) { ...@@ -206,6 +208,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: false, hasCode: false,
hasTasks: false, hasTasks: false,
hasInc: false, hasInc: false,
title: "",
}, },
{ {
name: "with inline code", name: "with inline code",
...@@ -214,6 +217,7 @@ func TestExtractProperties(t *testing.T) { ...@@ -214,6 +217,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: true, hasCode: true,
hasTasks: false, hasTasks: false,
hasInc: false, hasInc: false,
title: "",
}, },
{ {
name: "with code block", name: "with code block",
...@@ -222,6 +226,7 @@ func TestExtractProperties(t *testing.T) { ...@@ -222,6 +226,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: true, hasCode: true,
hasTasks: false, hasTasks: false,
hasInc: false, hasInc: false,
title: "",
}, },
{ {
name: "with completed task", name: "with completed task",
...@@ -230,6 +235,7 @@ func TestExtractProperties(t *testing.T) { ...@@ -230,6 +235,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: false, hasCode: false,
hasTasks: true, hasTasks: true,
hasInc: false, hasInc: false,
title: "",
}, },
{ {
name: "with incomplete task", name: "with incomplete task",
...@@ -238,6 +244,7 @@ func TestExtractProperties(t *testing.T) { ...@@ -238,6 +244,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: false, hasCode: false,
hasTasks: true, hasTasks: true,
hasInc: true, hasInc: true,
title: "",
}, },
{ {
name: "mixed tasks", name: "mixed tasks",
...@@ -246,6 +253,7 @@ func TestExtractProperties(t *testing.T) { ...@@ -246,6 +253,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: false, hasCode: false,
hasTasks: true, hasTasks: true,
hasInc: true, hasInc: true,
title: "",
}, },
{ {
name: "everything", name: "everything",
...@@ -254,6 +262,32 @@ func TestExtractProperties(t *testing.T) { ...@@ -254,6 +262,32 @@ func TestExtractProperties(t *testing.T) {
hasCode: true, hasCode: true,
hasTasks: true, hasTasks: true,
hasInc: true, hasInc: true,
title: "Title",
},
{
name: "h1 as first node extracts title",
content: "# My Article Title\n\nBody text here.",
title: "My Article Title",
},
{
name: "h2 as first node does not extract title",
content: "## Sub Heading\n\nBody text.",
title: "",
},
{
name: "h1 not first node does not extract title",
content: "Some text\n\n# Heading Later",
title: "",
},
{
name: "h1 with inline formatting extracts plain text",
content: "# Title with **bold** and *italic*\n\nBody.",
title: "Title with bold and italic",
},
{
name: "empty content has no title",
content: "",
title: "",
}, },
} }
...@@ -267,6 +301,41 @@ func TestExtractProperties(t *testing.T) { ...@@ -267,6 +301,41 @@ func TestExtractProperties(t *testing.T) {
assert.Equal(t, tt.hasCode, props.HasCode, "HasCode") assert.Equal(t, tt.hasCode, props.HasCode, "HasCode")
assert.Equal(t, tt.hasTasks, props.HasTaskList, "HasTaskList") assert.Equal(t, tt.hasTasks, props.HasTaskList, "HasTaskList")
assert.Equal(t, tt.hasInc, props.HasIncompleteTasks, "HasIncompleteTasks") assert.Equal(t, tt.hasInc, props.HasIncompleteTasks, "HasIncompleteTasks")
assert.Equal(t, tt.title, props.Title, "Title")
})
}
}
func TestExtractAllTitle(t *testing.T) {
svc := NewService(WithTagExtension())
tests := []struct {
name string
content string
title string
}{
{
name: "h1 first node",
content: "# Article Title\n\nContent with #tag",
title: "Article Title",
},
{
name: "no h1",
content: "Just text with #tag",
title: "",
},
{
name: "h1 not first",
content: "Intro\n\n# Late Heading",
title: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
data, err := svc.ExtractAll([]byte(tt.content))
require.NoError(t, err)
assert.Equal(t, tt.title, data.Property.Title, "Title")
}) })
} }
} }
......
...@@ -227,6 +227,8 @@ message Memo { ...@@ -227,6 +227,8 @@ message Memo {
bool has_task_list = 2; bool has_task_list = 2;
bool has_code = 3; bool has_code = 3;
bool has_incomplete_tasks = 4; bool has_incomplete_tasks = 4;
// The title extracted from the first H1 heading, if present.
string title = 5;
} }
} }
......
...@@ -1679,8 +1679,10 @@ type Memo_Property struct { ...@@ -1679,8 +1679,10 @@ type Memo_Property struct {
HasTaskList bool `protobuf:"varint,2,opt,name=has_task_list,json=hasTaskList,proto3" json:"has_task_list,omitempty"` HasTaskList bool `protobuf:"varint,2,opt,name=has_task_list,json=hasTaskList,proto3" json:"has_task_list,omitempty"`
HasCode bool `protobuf:"varint,3,opt,name=has_code,json=hasCode,proto3" json:"has_code,omitempty"` HasCode bool `protobuf:"varint,3,opt,name=has_code,json=hasCode,proto3" json:"has_code,omitempty"`
HasIncompleteTasks bool `protobuf:"varint,4,opt,name=has_incomplete_tasks,json=hasIncompleteTasks,proto3" json:"has_incomplete_tasks,omitempty"` HasIncompleteTasks bool `protobuf:"varint,4,opt,name=has_incomplete_tasks,json=hasIncompleteTasks,proto3" json:"has_incomplete_tasks,omitempty"`
unknownFields protoimpl.UnknownFields // The title extracted from the first H1 heading, if present.
sizeCache protoimpl.SizeCache Title string `protobuf:"bytes,5,opt,name=title,proto3" json:"title,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
} }
func (x *Memo_Property) Reset() { func (x *Memo_Property) Reset() {
...@@ -1741,6 +1743,13 @@ func (x *Memo_Property) GetHasIncompleteTasks() bool { ...@@ -1741,6 +1743,13 @@ func (x *Memo_Property) GetHasIncompleteTasks() bool {
return false return false
} }
func (x *Memo_Property) GetTitle() string {
if x != nil {
return x.Title
}
return ""
}
// Memo reference in relations. // Memo reference in relations.
type MemoRelation_Memo struct { type MemoRelation_Memo struct {
state protoimpl.MessageState `protogen:"open.v1"` state protoimpl.MessageState `protogen:"open.v1"`
...@@ -1812,7 +1821,7 @@ const file_api_v1_memo_service_proto_rawDesc = "" + ...@@ -1812,7 +1821,7 @@ const file_api_v1_memo_service_proto_rawDesc = "" +
"\rreaction_type\x18\x04 \x01(\tB\x03\xe0A\x02R\freactionType\x12@\n" + "\rreaction_type\x18\x04 \x01(\tB\x03\xe0A\x02R\freactionType\x12@\n" +
"\vcreate_time\x18\x05 \x01(\v2\x1a.google.protobuf.TimestampB\x03\xe0A\x03R\n" + "\vcreate_time\x18\x05 \x01(\v2\x1a.google.protobuf.TimestampB\x03\xe0A\x03R\n" +
"createTime:X\xeaAU\n" + "createTime:X\xeaAU\n" +
"\x15memos.api.v1/Reaction\x12!memos/{memo}/reactions/{reaction}\x1a\x04name*\treactions2\breaction\"\xd8\b\n" + "\x15memos.api.v1/Reaction\x12!memos/{memo}/reactions/{reaction}\x1a\x04name*\treactions2\breaction\"\xee\b\n" +
"\x04Memo\x12\x17\n" + "\x04Memo\x12\x17\n" +
"\x04name\x18\x01 \x01(\tB\x03\xe0A\bR\x04name\x12.\n" + "\x04name\x18\x01 \x01(\tB\x03\xe0A\bR\x04name\x12.\n" +
"\x05state\x18\x02 \x01(\x0e2\x13.memos.api.v1.StateB\x03\xe0A\x02R\x05state\x123\n" + "\x05state\x18\x02 \x01(\x0e2\x13.memos.api.v1.StateB\x03\xe0A\x02R\x05state\x123\n" +
...@@ -1837,12 +1846,13 @@ const file_api_v1_memo_service_proto_rawDesc = "" + ...@@ -1837,12 +1846,13 @@ const file_api_v1_memo_service_proto_rawDesc = "" +
"\x06parent\x18\x10 \x01(\tB\x19\xe0A\x03\xfaA\x13\n" + "\x06parent\x18\x10 \x01(\tB\x19\xe0A\x03\xfaA\x13\n" +
"\x11memos.api.v1/MemoH\x00R\x06parent\x88\x01\x01\x12\x1d\n" + "\x11memos.api.v1/MemoH\x00R\x06parent\x88\x01\x01\x12\x1d\n" +
"\asnippet\x18\x11 \x01(\tB\x03\xe0A\x03R\asnippet\x12<\n" + "\asnippet\x18\x11 \x01(\tB\x03\xe0A\x03R\asnippet\x12<\n" +
"\blocation\x18\x12 \x01(\v2\x16.memos.api.v1.LocationB\x03\xe0A\x01H\x01R\blocation\x88\x01\x01\x1a\x96\x01\n" + "\blocation\x18\x12 \x01(\v2\x16.memos.api.v1.LocationB\x03\xe0A\x01H\x01R\blocation\x88\x01\x01\x1a\xac\x01\n" +
"\bProperty\x12\x19\n" + "\bProperty\x12\x19\n" +
"\bhas_link\x18\x01 \x01(\bR\ahasLink\x12\"\n" + "\bhas_link\x18\x01 \x01(\bR\ahasLink\x12\"\n" +
"\rhas_task_list\x18\x02 \x01(\bR\vhasTaskList\x12\x19\n" + "\rhas_task_list\x18\x02 \x01(\bR\vhasTaskList\x12\x19\n" +
"\bhas_code\x18\x03 \x01(\bR\ahasCode\x120\n" + "\bhas_code\x18\x03 \x01(\bR\ahasCode\x120\n" +
"\x14has_incomplete_tasks\x18\x04 \x01(\bR\x12hasIncompleteTasks:7\xeaA4\n" + "\x14has_incomplete_tasks\x18\x04 \x01(\bR\x12hasIncompleteTasks\x12\x14\n" +
"\x05title\x18\x05 \x01(\tR\x05title:7\xeaA4\n" +
"\x11memos.api.v1/Memo\x12\fmemos/{memo}\x1a\x04name*\x05memos2\x04memoB\t\n" + "\x11memos.api.v1/Memo\x12\fmemos/{memo}\x1a\x04name*\x05memos2\x04memoB\t\n" +
"\a_parentB\v\n" + "\a_parentB\v\n" +
"\t_location\"u\n" + "\t_location\"u\n" +
......
...@@ -2590,6 +2590,9 @@ components: ...@@ -2590,6 +2590,9 @@ components:
type: boolean type: boolean
hasIncompleteTasks: hasIncompleteTasks:
type: boolean type: boolean
title:
type: string
description: The title extracted from the first H1 heading, if present.
description: Computed properties of a memo. description: Computed properties of a memo.
OAuth2Config: OAuth2Config:
type: object type: object
......
...@@ -88,8 +88,10 @@ type MemoPayload_Property struct { ...@@ -88,8 +88,10 @@ type MemoPayload_Property struct {
HasTaskList bool `protobuf:"varint,2,opt,name=has_task_list,json=hasTaskList,proto3" json:"has_task_list,omitempty"` HasTaskList bool `protobuf:"varint,2,opt,name=has_task_list,json=hasTaskList,proto3" json:"has_task_list,omitempty"`
HasCode bool `protobuf:"varint,3,opt,name=has_code,json=hasCode,proto3" json:"has_code,omitempty"` HasCode bool `protobuf:"varint,3,opt,name=has_code,json=hasCode,proto3" json:"has_code,omitempty"`
HasIncompleteTasks bool `protobuf:"varint,4,opt,name=has_incomplete_tasks,json=hasIncompleteTasks,proto3" json:"has_incomplete_tasks,omitempty"` HasIncompleteTasks bool `protobuf:"varint,4,opt,name=has_incomplete_tasks,json=hasIncompleteTasks,proto3" json:"has_incomplete_tasks,omitempty"`
unknownFields protoimpl.UnknownFields // The title extracted from the first H1 heading, if present.
sizeCache protoimpl.SizeCache Title string `protobuf:"bytes,5,opt,name=title,proto3" json:"title,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
} }
func (x *MemoPayload_Property) Reset() { func (x *MemoPayload_Property) Reset() {
...@@ -150,6 +152,13 @@ func (x *MemoPayload_Property) GetHasIncompleteTasks() bool { ...@@ -150,6 +152,13 @@ func (x *MemoPayload_Property) GetHasIncompleteTasks() bool {
return false return false
} }
func (x *MemoPayload_Property) GetTitle() string {
if x != nil {
return x.Title
}
return ""
}
type MemoPayload_Location struct { type MemoPayload_Location struct {
state protoimpl.MessageState `protogen:"open.v1"` state protoimpl.MessageState `protogen:"open.v1"`
Placeholder string `protobuf:"bytes,1,opt,name=placeholder,proto3" json:"placeholder,omitempty"` Placeholder string `protobuf:"bytes,1,opt,name=placeholder,proto3" json:"placeholder,omitempty"`
...@@ -214,16 +223,17 @@ var File_store_memo_proto protoreflect.FileDescriptor ...@@ -214,16 +223,17 @@ var File_store_memo_proto protoreflect.FileDescriptor
const file_store_memo_proto_rawDesc = "" + const file_store_memo_proto_rawDesc = "" +
"\n" + "\n" +
"\x10store/memo.proto\x12\vmemos.store\"\xa0\x03\n" + "\x10store/memo.proto\x12\vmemos.store\"\xb6\x03\n" +
"\vMemoPayload\x12=\n" + "\vMemoPayload\x12=\n" +
"\bproperty\x18\x01 \x01(\v2!.memos.store.MemoPayload.PropertyR\bproperty\x12=\n" + "\bproperty\x18\x01 \x01(\v2!.memos.store.MemoPayload.PropertyR\bproperty\x12=\n" +
"\blocation\x18\x02 \x01(\v2!.memos.store.MemoPayload.LocationR\blocation\x12\x12\n" + "\blocation\x18\x02 \x01(\v2!.memos.store.MemoPayload.LocationR\blocation\x12\x12\n" +
"\x04tags\x18\x03 \x03(\tR\x04tags\x1a\x96\x01\n" + "\x04tags\x18\x03 \x03(\tR\x04tags\x1a\xac\x01\n" +
"\bProperty\x12\x19\n" + "\bProperty\x12\x19\n" +
"\bhas_link\x18\x01 \x01(\bR\ahasLink\x12\"\n" + "\bhas_link\x18\x01 \x01(\bR\ahasLink\x12\"\n" +
"\rhas_task_list\x18\x02 \x01(\bR\vhasTaskList\x12\x19\n" + "\rhas_task_list\x18\x02 \x01(\bR\vhasTaskList\x12\x19\n" +
"\bhas_code\x18\x03 \x01(\bR\ahasCode\x120\n" + "\bhas_code\x18\x03 \x01(\bR\ahasCode\x120\n" +
"\x14has_incomplete_tasks\x18\x04 \x01(\bR\x12hasIncompleteTasks\x1af\n" + "\x14has_incomplete_tasks\x18\x04 \x01(\bR\x12hasIncompleteTasks\x12\x14\n" +
"\x05title\x18\x05 \x01(\tR\x05title\x1af\n" +
"\bLocation\x12 \n" + "\bLocation\x12 \n" +
"\vplaceholder\x18\x01 \x01(\tR\vplaceholder\x12\x1a\n" + "\vplaceholder\x18\x01 \x01(\tR\vplaceholder\x12\x1a\n" +
"\blatitude\x18\x02 \x01(\x01R\blatitude\x12\x1c\n" + "\blatitude\x18\x02 \x01(\x01R\blatitude\x12\x1c\n" +
......
...@@ -17,6 +17,8 @@ message MemoPayload { ...@@ -17,6 +17,8 @@ message MemoPayload {
bool has_task_list = 2; bool has_task_list = 2;
bool has_code = 3; bool has_code = 3;
bool has_incomplete_tasks = 4; bool has_incomplete_tasks = 4;
// The title extracted from the first H1 heading, if present.
string title = 5;
} }
message Location { message Location {
......
...@@ -192,6 +192,7 @@ func convertMemoPropertyFromStore(property *storepb.MemoPayload_Property) *v1pb. ...@@ -192,6 +192,7 @@ func convertMemoPropertyFromStore(property *storepb.MemoPayload_Property) *v1pb.
HasTaskList: property.HasTaskList, HasTaskList: property.HasTaskList,
HasCode: property.HasCode, HasCode: property.HasCode,
HasIncompleteTasks: property.HasIncompleteTasks, HasIncompleteTasks: property.HasIncompleteTasks,
Title: property.Title,
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment