~netlandish/links-dev

links: Add utf8 sanitizer to address edge case import errors. v1 APPLIED

Peter Sanchez: 1
 Add utf8 sanitizer to address edge case import errors.

 2 files changed, 24 insertions(+), 2 deletions(-)
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.code.netlandish.com/~netlandish/links-dev/patches/102/mbox | git am -3
Learn more about email & git

[PATCH links] Add utf8 sanitizer to address edge case import errors. Export this patch

Fixes: https://todo.code.netlandish.com/~netlandish/links/96
Signed-off-by: Peter Sanchez <peter@netlandish.com>
---
 core/import.go |  5 +++--
 helpers.go     | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/core/import.go b/core/import.go
index 1671f47..ae8e14e 100644
--- a/core/import.go
+++ b/core/import.go
@@ -114,6 +114,7 @@ func (p pinBoardObj) GetTags() []string {
func trimTags(tags []string) []string {
	var ret []string
	for _, t := range tags {
		t = links.SanitizeUTF8(t)
		if len(t) > 50 {
			t = t[:50]
		}
@@ -300,9 +301,9 @@ func processOrgLinks(obj importObj, baseURLMap map[string]int,
		title = title[:146] + "..."
	}
	return &models.OrgLink{
		Title:       title,
		Title:       links.SanitizeUTF8(title),
		URL:         obj.GetURL(),
		Description: obj.GetDescription(),
		Description: links.SanitizeUTF8(obj.GetDescription()),
		BaseURLID:   sql.NullInt64{Valid: true, Int64: int64(baseID)},
		OrgID:       org.ID,
		UserID:      int(user.ID),
diff --git a/helpers.go b/helpers.go
index 36a6eb7..125698a 100644
--- a/helpers.go
+++ b/helpers.go
@@ -23,6 +23,7 @@ import (
	"strconv"
	"strings"
	"time"
	"unicode/utf8"

	"git.sr.ht/~emersion/gqlclient"
	"github.com/99designs/gqlgen/graphql"
@@ -1159,3 +1160,23 @@ func IPForContext(ctx context.Context) string {
	}
	return ip
}

// SanitizeUTF8 will strip out invalid utf-8 characters
func SanitizeUTF8(input string) string {
	if utf8.ValidString(input) {
		return input
	}

	var b strings.Builder
	for i := 0; i < len(input); {
		r, size := utf8.DecodeRuneInString(input[i:])
		if r == utf8.RuneError && size == 1 {
			// Replace invalid bytes with a space
			b.WriteString(" ")
		} else {
			b.WriteRune(r)
		}
		i += size
	}
	return b.String()
}
-- 
2.47.2
Merged.

To git@git.code.netlandish.com:~netlandish/links
   864c750..ad6d7d7  master -> master