~netlandish/links-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
1

[PATCH links] bug: when same tag is given twice postgresql errors due to `ON CONFLICT` collision. This dedupes tag processing.

Details
Message ID
<20260421124112.32167-1-peter@netlandish.com>
Sender timestamp
1776753668
DKIM signature
missing
Download raw message
Patch: +211 -5
---
 api/api_test.go           | 187 ++++++++++++++++++++++++++++++++++++++
 helpers.go                |  11 ++-
 models/tag_link_shorts.go |   9 +-
 models/tag_links.go       |   9 +-
 4 files changed, 211 insertions(+), 5 deletions(-)

diff --git a/api/api_test.go b/api/api_test.go
index edf5ab6..ec779dd 100644
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -3579,6 +3579,193 @@ func TestAPI(t *testing.T) {
		c.Equal(0, len(tagLinksAAfter), "Link should have 0 tags after update with empty tags")
	})

	t.Run("addLink dedupes tags that resolve to the same slug", func(t *testing.T) {
		c := require.New(t)
		type AddLinkResponse struct {
			Link models.OrgLink `json:"addLink"`
		}

		addLinkQuery := `mutation AddLink($title: String!, $url: String!, $description: String,
						  $visibility: LinkVisibility!, $unread: Boolean!, $starred: Boolean!,
						  $archive: Boolean! $slug: String!, $tags: String) {
					addLink(input: {
								title: $title,
								url: $url,
								description: $description,
								visibility: $visibility,
								unread: $unread,
								starred: $starred,
								archive: $archive,
								orgSlug: $slug,
								tags: $tags}) {
						id
						hash
					}
				}`

		var link AddLinkResponse
		op := gqlclient.NewOperation(addLinkQuery)
		op.Var("title", "Link with duplicate slug tags on create")
		op.Var("url", "https://example.com/dupe-tags-add")
		op.Var("description", "")
		op.Var("visibility", models.OrgLinkVisibilityPrivate)
		op.Var("tags", "dupe-add, Dupe-Add, DUPE-ADD")
		op.Var("slug", "personal-org")
		op.Var("unread", false)
		op.Var("starred", false)
		op.Var("archive", false)
		err := links.Execute(ctx, op, &link)
		c.NoError(err)
		c.NotZero(link.Link.ID)

		tagLinks, err := models.GetTagLinks(dbCtx,
			&database.FilterOptions{Filter: sq.Expr("org_link_id = ?", link.Link.ID)})
		c.NoError(err)
		c.Equal(1, len(tagLinks), "duplicate-slug tags should collapse to a single tag_links row")
		c.Equal("dupe-add", tagLinks[0].Name, "first occurrence should win for the stored name")
	})

	t.Run("updateLink dedupes tags that resolve to the same slug", func(t *testing.T) {
		c := require.New(t)
		type AddLinkResponse struct {
			Link models.OrgLink `json:"addLink"`
		}
		type UpdateLinkResponse struct {
			Link models.OrgLink `json:"updateLink"`
		}

		addLinkQuery := `mutation AddLink($title: String!, $url: String!, $description: String,
						  $visibility: LinkVisibility!, $unread: Boolean!, $starred: Boolean!,
						  $archive: Boolean! $slug: String!, $tags: String) {
					addLink(input: {
								title: $title,
								url: $url,
								description: $description,
								visibility: $visibility,
								unread: $unread,
								starred: $starred,
								archive: $archive,
								orgSlug: $slug,
								tags: $tags}) {
						id
						hash
					}
				}`

		var link AddLinkResponse
		opAdd := gqlclient.NewOperation(addLinkQuery)
		opAdd.Var("title", "Link with duplicate slug tags on update")
		opAdd.Var("url", "https://example.com/dupe-tags-update")
		opAdd.Var("description", "")
		opAdd.Var("visibility", models.OrgLinkVisibilityPrivate)
		opAdd.Var("tags", "original-tag")
		opAdd.Var("slug", "personal-org")
		opAdd.Var("unread", false)
		opAdd.Var("starred", false)
		opAdd.Var("archive", false)
		err := links.Execute(ctx, opAdd, &link)
		c.NoError(err)
		c.NotZero(link.Link.ID)

		updateLinkQuery := `mutation UpdateLink($hash: String!, $title: String!,
								 $url: String!, $visibility: LinkVisibility!,
								 $tags: String) {
				updateLink(input: {
					title: $title,
					hash: $hash,
					url: $url,
					visibility: $visibility,
					tags: $tags,
				}) {id, hash}
			}`

		var updated UpdateLinkResponse
		opUpdate := gqlclient.NewOperation(updateLinkQuery)
		opUpdate.Var("hash", link.Link.Hash)
		opUpdate.Var("title", "Link with duplicate slug tags on update")
		opUpdate.Var("url", "https://example.com/dupe-tags-update")
		opUpdate.Var("visibility", models.OrgLinkVisibilityPrivate)
		opUpdate.Var("tags", "dupe-upd, Dupe-Upd, DUPE-UPD")
		err = links.Execute(ctx, opUpdate, &updated)
		c.NoError(err, "ON CONFLICT must not see the same (org_link_id, tag_id) twice")

		tagLinks, err := models.GetTagLinks(dbCtx,
			&database.FilterOptions{Filter: sq.Expr("org_link_id = ?", link.Link.ID)})
		c.NoError(err)
		c.Equal(1, len(tagLinks), "duplicate-slug tags should collapse to a single tag_links row")
		c.Equal("dupe-upd", tagLinks[0].Name, "first occurrence should win for the stored name")
	})

	t.Run("linkShort dedupes tags that resolve to the same slug", func(t *testing.T) {
		c := require.New(t)
		type AddLinkShortResponse struct {
			LinkShort models.LinkShort `json:"addLinkShort"`
		}
		type UpdateLinkShortResponse struct {
			LinkShort models.LinkShort `json:"updateLinkShort"`
		}

		var added AddLinkShortResponse
		opAdd := gqlclient.NewOperation(
			`mutation AddLinkShort($title: String!, $url: String!, $slug: String!,
						$domain: Int!, $short: String, $tags: String) {
					addLinkShort(input: {
								title: $title,
								url: $url,
								orgSlug: $slug,
								domainId: $domain,
								shortCode: $short,
								tags: $tags}) {
						id
					}
				}`)
		opAdd.Var("title", "short with dupe tags")
		opAdd.Var("url", "https://dupe-short.example.org")
		opAdd.Var("slug", "personal-org")
		opAdd.Var("domain", 1)
		opAdd.Var("short", "")
		opAdd.Var("tags", "short-dupe, Short-Dupe, SHORT-DUPE")
		err := links.Execute(ctx, opAdd, &added)
		c.NoError(err)
		c.NotZero(added.LinkShort.ID)

		tagLinks, err := models.GetTagLinkShorts(dbCtx,
			&database.FilterOptions{Filter: sq.Expr("link_short_id = ?", added.LinkShort.ID)})
		c.NoError(err)
		c.Equal(1, len(tagLinks), "duplicate-slug tags should collapse to a single tag_link_shorts row")
		c.Equal("short-dupe", tagLinks[0].Name, "first occurrence should win for the stored name")

		var updated UpdateLinkShortResponse
		opUpdate := gqlclient.NewOperation(
			`mutation UpdateLinkShort($id: Int!, $title: String!, $url: String!,
						$short: String, $domain: Int, $tags: String) {
					updateLinkShort(input: {
						id: $id,
						title: $title,
						url: $url,
						shortCode: $short,
						domainId: $domain,
						tags: $tags,
					}) {
						id
					}
				}`)
		opUpdate.Var("id", added.LinkShort.ID)
		opUpdate.Var("title", "short with dupe tags")
		opUpdate.Var("url", "https://dupe-short.example.org")
		opUpdate.Var("short", "")
		opUpdate.Var("domain", 1)
		opUpdate.Var("tags", "short-dupe-2, SHORT-DUPE-2")
		err = links.Execute(ctx, opUpdate, &updated)
		c.NoError(err, "ON CONFLICT must not see the same (link_short_id, tag_id) twice")

		tagLinks, err = models.GetTagLinkShorts(dbCtx,
			&database.FilterOptions{Filter: sq.Expr("link_short_id = ?", added.LinkShort.ID)})
		c.NoError(err)
		c.Equal(1, len(tagLinks), "duplicate-slug tags should collapse to a single tag_link_shorts row")
		c.Equal("short-dupe-2", tagLinks[0].Name, "first occurrence should win for the stored name")
	})

	t.Run("get base url by hash", func(t *testing.T) {
		_, err := sq.Update("base_urls").
			Set("public_ready", true).
diff --git a/helpers.go b/helpers.go
index 44c4c91..d191d43 100644
--- a/helpers.go
+++ b/helpers.go
@@ -26,11 +26,11 @@ import (
	"unicode/utf8"

	"git.sr.ht/~emersion/gqlclient"
	"github.com/microcosm-cc/bluemonday"
	"github.com/99designs/gqlgen/graphql"
	sq "github.com/Masterminds/squirrel"
	"github.com/labstack/echo/v4"
	"github.com/labstack/echo/v4/middleware"
	"github.com/microcosm-cc/bluemonday"
	"github.com/segmentio/ksuid"
	"github.com/shopspring/decimal"
	"golang.org/x/net/html"
@@ -764,6 +764,11 @@ func LangForContext(ctx context.Context) string {

func ProcessTags(ctx context.Context, tags []string) ([]models.ProcessedTag, error) {
	tagInputs := make([]models.ProcessedTag, 0)
	// Dedupe by resolved Tag.ID — different input strings can slugify to the
	// same tag (e.g. "foo" and "FOO"). Without this, downstream batch inserts
	// with ON CONFLICT DO UPDATE hit the same row twice and Postgres errors.
	// First occurrence wins for the stored Name.
	seen := make(map[int]bool)
	for _, tag := range tags {
		originalName := strings.TrimSpace(tag)
		originalName = strings.TrimPrefix(originalName, "#")
@@ -780,6 +785,10 @@ func ProcessTags(ctx context.Context, tags []string) ([]models.ProcessedTag, err
			if err != nil {
				return nil, err
			}
			if seen[Tag.ID] {
				continue
			}
			seen[Tag.ID] = true
			tagInputs = append(tagInputs, models.ProcessedTag{
				ID:   Tag.ID,
				Name: originalName,
diff --git a/models/tag_link_shorts.go b/models/tag_link_shorts.go
index b968bde..3ed276e 100644
--- a/models/tag_link_shorts.go
+++ b/models/tag_link_shorts.go
@@ -64,16 +64,21 @@ func CreateBatchTagLinkShorts(ctx context.Context, linkShortID int, tags []Proce
	if len(tags) == 0 {
		return nil
	}
	// See ProcessTags function in helpers.go for dedupe reasoning.
	seen := make(map[int]bool, len(tags))
	err := database.WithTx(ctx, nil, func(tx *sql.Tx) error {
		var err error
		q := sq.
			Insert("tag_link_shorts").
			Columns("link_short_id", "tag_id", "name")

		for _, tag := range tags {
			if seen[tag.ID] {
				continue
			}
			seen[tag.ID] = true
			q = q.Values(linkShortID, tag.ID, tag.Name)
		}
		_, err = q.
		_, err := q.
			Suffix("ON CONFLICT (link_short_id, tag_id) DO UPDATE SET name = EXCLUDED.name").
			PlaceholderFormat(database.GetPlaceholderFormat()).
			RunWith(tx).
diff --git a/models/tag_links.go b/models/tag_links.go
index 7f17e65..adfe043 100644
--- a/models/tag_links.go
+++ b/models/tag_links.go
@@ -64,16 +64,21 @@ func CreateBatchTagLinks(ctx context.Context, linkID int, tags []ProcessedTag) e
	if len(tags) == 0 {
		return nil
	}
	// See ProcessTags function in helpers.go for dedupe reasoning.
	seen := make(map[int]bool, len(tags))
	err := database.WithTx(ctx, nil, func(tx *sql.Tx) error {
		var err error
		q := sq.
			Insert("tag_links").
			Columns("org_link_id", "tag_id", "name")

		for _, tag := range tags {
			if seen[tag.ID] {
				continue
			}
			seen[tag.ID] = true
			q = q.Values(linkID, tag.ID, tag.Name)
		}
		_, err = q.
		_, err := q.
			Suffix("ON CONFLICT (org_link_id, tag_id) DO UPDATE SET name = EXCLUDED.name").
			PlaceholderFormat(database.GetPlaceholderFormat()).
			RunWith(tx).
-- 
2.52.0
Details
Message ID
<DHYUJCE14JOV.EOEPUC1GQ4KX@netlandish.com>
In-Reply-To
<20260421124112.32167-1-peter@netlandish.com> (view parent)
Sender timestamp
1776754431
DKIM signature
missing
Download raw message
Applied.

To git@git.code.netlandish.com:~netlandish/links
   a44df5c..41ae80e  master -> master
Reply to thread Export thread (mbox)