Fixes: https://todo.code.netlandish.com/~netlandish/links/96
Signed-off-by: Peter Sanchez <peter@netlandish.com>
---
core/import.go | 5 +++--
helpers.go | 21 +++++++++++++++++++++
2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/core/import.go b/core/import.go
index 1671f47..ae8e14e 100644
--- a/core/import.go
+++ b/core/import.go
@@ -114,6 +114,7 @@ func (p pinBoardObj) GetTags() []string {
func trimTags(tags []string) []string {
var ret []string
for _, t := range tags {
+ t = links.SanitizeUTF8(t)
if len(t) > 50 {
t = t[:50]
}
@@ -300,9 +301,9 @@ func processOrgLinks(obj importObj, baseURLMap map[string]int,
title = title[:146] + "..."
}
return &models.OrgLink{
- Title: title,
+ Title: links.SanitizeUTF8(title),
URL: obj.GetURL(),
- Description: obj.GetDescription(),
+ Description: links.SanitizeUTF8(obj.GetDescription()),
BaseURLID: sql.NullInt64{Valid: true, Int64: int64(baseID)},
OrgID: org.ID,
UserID: int(user.ID),
diff --git a/helpers.go b/helpers.go
index 36a6eb7..125698a 100644
--- a/helpers.go
+++ b/helpers.go
@@ -23,6 +23,7 @@ import (
"strconv"
"strings"
"time"
+ "unicode/utf8"
"git.sr.ht/~emersion/gqlclient"
"github.com/99designs/gqlgen/graphql"
@@ -1159,3 +1160,23 @@ func IPForContext(ctx context.Context) string {
}
return ip
}
+
+// SanitizeUTF8 will strip out invalid utf-8 characters
+func SanitizeUTF8(input string) string {
+ if utf8.ValidString(input) {
+ return input
+ }
+
+ var b strings.Builder
+ for i := 0; i < len(input); {
+ r, size := utf8.DecodeRuneInString(input[i:])
+ if r == utf8.RuneError && size == 1 {
+ // Replace invalid bytes with a space
+ b.WriteString(" ")
+ } else {
+ b.WriteRune(r)
+ }
+ i += size
+ }
+ return b.String()
+}
--
2.47.2