Peter Sanchez: 1 bug: when same tag is given twice postgresql errors due to `ON CONFLICT` collision. This dedupes tag processing. 4 files changed, 211 insertions(+), 5 deletions(-)
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.code.netlandish.com/~netlandish/links-dev/patches/228/mbox | git am -3Learn more about email & git
--- api/api_test.go | 187 ++++++++++++++++++++++++++++++++++++++ helpers.go | 11 ++- models/tag_link_shorts.go | 9 +- models/tag_links.go | 9 +- 4 files changed, 211 insertions(+), 5 deletions(-) diff --git a/api/api_test.go b/api/api_test.go index edf5ab6..ec779dd 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -3579,6 +3579,193 @@ func TestAPI(t *testing.T) { c.Equal(0, len(tagLinksAAfter), "Link should have 0 tags after update with empty tags") }) + t.Run("addLink dedupes tags that resolve to the same slug", func(t *testing.T) { + c := require.New(t) + type AddLinkResponse struct { + Link models.OrgLink `json:"addLink"` + } + + addLinkQuery := `mutation AddLink($title: String!, $url: String!, $description: String, + $visibility: LinkVisibility!, $unread: Boolean!, $starred: Boolean!, + $archive: Boolean! $slug: String!, $tags: String) { + addLink(input: { + title: $title, + url: $url, + description: $description, + visibility: $visibility, + unread: $unread, + starred: $starred, + archive: $archive, + orgSlug: $slug, + tags: $tags}) { + id + hash + } + }` + + var link AddLinkResponse + op := gqlclient.NewOperation(addLinkQuery) + op.Var("title", "Link with duplicate slug tags on create") + op.Var("url", "https://example.com/dupe-tags-add") + op.Var("description", "") + op.Var("visibility", models.OrgLinkVisibilityPrivate) + op.Var("tags", "dupe-add, Dupe-Add, DUPE-ADD") + op.Var("slug", "personal-org") + op.Var("unread", false) + op.Var("starred", false) + op.Var("archive", false) + err := links.Execute(ctx, op, &link) + c.NoError(err) + c.NotZero(link.Link.ID) + + tagLinks, err := models.GetTagLinks(dbCtx, + &database.FilterOptions{Filter: sq.Expr("org_link_id = ?", link.Link.ID)}) + c.NoError(err) + c.Equal(1, len(tagLinks), "duplicate-slug tags should collapse to a single tag_links row") + c.Equal("dupe-add", tagLinks[0].Name, "first occurrence should win for the stored name") + }) + + t.Run("updateLink dedupes tags that resolve to the same slug", func(t *testing.T) { + c := require.New(t) + type AddLinkResponse struct { + Link models.OrgLink `json:"addLink"` + } + type UpdateLinkResponse struct { + Link models.OrgLink `json:"updateLink"` + } + + addLinkQuery := `mutation AddLink($title: String!, $url: String!, $description: String, + $visibility: LinkVisibility!, $unread: Boolean!, $starred: Boolean!, + $archive: Boolean! $slug: String!, $tags: String) { + addLink(input: { + title: $title, + url: $url, + description: $description, + visibility: $visibility, + unread: $unread, + starred: $starred, + archive: $archive, + orgSlug: $slug, + tags: $tags}) { + id + hash + } + }` + + var link AddLinkResponse + opAdd := gqlclient.NewOperation(addLinkQuery) + opAdd.Var("title", "Link with duplicate slug tags on update") + opAdd.Var("url", "https://example.com/dupe-tags-update") + opAdd.Var("description", "") + opAdd.Var("visibility", models.OrgLinkVisibilityPrivate) + opAdd.Var("tags", "original-tag") + opAdd.Var("slug", "personal-org") + opAdd.Var("unread", false) + opAdd.Var("starred", false) + opAdd.Var("archive", false) + err := links.Execute(ctx, opAdd, &link) + c.NoError(err) + c.NotZero(link.Link.ID) + + updateLinkQuery := `mutation UpdateLink($hash: String!, $title: String!, + $url: String!, $visibility: LinkVisibility!, + $tags: String) { + updateLink(input: { + title: $title, + hash: $hash, + url: $url, + visibility: $visibility, + tags: $tags, + }) {id, hash} + }` + + var updated UpdateLinkResponse + opUpdate := gqlclient.NewOperation(updateLinkQuery) + opUpdate.Var("hash", link.Link.Hash) + opUpdate.Var("title", "Link with duplicate slug tags on update") + opUpdate.Var("url", "https://example.com/dupe-tags-update") + opUpdate.Var("visibility", models.OrgLinkVisibilityPrivate) + opUpdate.Var("tags", "dupe-upd, Dupe-Upd, DUPE-UPD") + err = links.Execute(ctx, opUpdate, &updated) + c.NoError(err, "ON CONFLICT must not see the same (org_link_id, tag_id) twice") + + tagLinks, err := models.GetTagLinks(dbCtx, + &database.FilterOptions{Filter: sq.Expr("org_link_id = ?", link.Link.ID)}) + c.NoError(err) + c.Equal(1, len(tagLinks), "duplicate-slug tags should collapse to a single tag_links row") + c.Equal("dupe-upd", tagLinks[0].Name, "first occurrence should win for the stored name") + }) + + t.Run("linkShort dedupes tags that resolve to the same slug", func(t *testing.T) { + c := require.New(t) + type AddLinkShortResponse struct { + LinkShort models.LinkShort `json:"addLinkShort"` + } + type UpdateLinkShortResponse struct { + LinkShort models.LinkShort `json:"updateLinkShort"` + } + + var added AddLinkShortResponse + opAdd := gqlclient.NewOperation( + `mutation AddLinkShort($title: String!, $url: String!, $slug: String!, + $domain: Int!, $short: String, $tags: String) { + addLinkShort(input: { + title: $title, + url: $url, + orgSlug: $slug, + domainId: $domain, + shortCode: $short, + tags: $tags}) { + id + } + }`) + opAdd.Var("title", "short with dupe tags") + opAdd.Var("url", "https://dupe-short.example.org") + opAdd.Var("slug", "personal-org") + opAdd.Var("domain", 1) + opAdd.Var("short", "") + opAdd.Var("tags", "short-dupe, Short-Dupe, SHORT-DUPE") + err := links.Execute(ctx, opAdd, &added) + c.NoError(err) + c.NotZero(added.LinkShort.ID) + + tagLinks, err := models.GetTagLinkShorts(dbCtx, + &database.FilterOptions{Filter: sq.Expr("link_short_id = ?", added.LinkShort.ID)}) + c.NoError(err) + c.Equal(1, len(tagLinks), "duplicate-slug tags should collapse to a single tag_link_shorts row") + c.Equal("short-dupe", tagLinks[0].Name, "first occurrence should win for the stored name") + + var updated UpdateLinkShortResponse + opUpdate := gqlclient.NewOperation( + `mutation UpdateLinkShort($id: Int!, $title: String!, $url: String!, + $short: String, $domain: Int, $tags: String) { + updateLinkShort(input: { + id: $id, + title: $title, + url: $url, + shortCode: $short, + domainId: $domain, + tags: $tags, + }) { + id + } + }`) + opUpdate.Var("id", added.LinkShort.ID) + opUpdate.Var("title", "short with dupe tags") + opUpdate.Var("url", "https://dupe-short.example.org") + opUpdate.Var("short", "") + opUpdate.Var("domain", 1) + opUpdate.Var("tags", "short-dupe-2, SHORT-DUPE-2") + err = links.Execute(ctx, opUpdate, &updated) + c.NoError(err, "ON CONFLICT must not see the same (link_short_id, tag_id) twice") + + tagLinks, err = models.GetTagLinkShorts(dbCtx, + &database.FilterOptions{Filter: sq.Expr("link_short_id = ?", added.LinkShort.ID)}) + c.NoError(err) + c.Equal(1, len(tagLinks), "duplicate-slug tags should collapse to a single tag_link_shorts row") + c.Equal("short-dupe-2", tagLinks[0].Name, "first occurrence should win for the stored name") + }) + t.Run("get base url by hash", func(t *testing.T) { _, err := sq.Update("base_urls"). Set("public_ready", true). diff --git a/helpers.go b/helpers.go index 44c4c91..d191d43 100644 --- a/helpers.go +++ b/helpers.go @@ -26,11 +26,11 @@ import ( "unicode/utf8" "git.sr.ht/~emersion/gqlclient" - "github.com/microcosm-cc/bluemonday" "github.com/99designs/gqlgen/graphql" sq "github.com/Masterminds/squirrel" "github.com/labstack/echo/v4" "github.com/labstack/echo/v4/middleware" + "github.com/microcosm-cc/bluemonday" "github.com/segmentio/ksuid" "github.com/shopspring/decimal" "golang.org/x/net/html" @@ -764,6 +764,11 @@ func LangForContext(ctx context.Context) string { func ProcessTags(ctx context.Context, tags []string) ([]models.ProcessedTag, error) { tagInputs := make([]models.ProcessedTag, 0) + // Dedupe by resolved Tag.ID — different input strings can slugify to the + // same tag (e.g. "foo" and "FOO"). Without this, downstream batch inserts + // with ON CONFLICT DO UPDATE hit the same row twice and Postgres errors. + // First occurrence wins for the stored Name. + seen := make(map[int]bool) for _, tag := range tags { originalName := strings.TrimSpace(tag) originalName = strings.TrimPrefix(originalName, "#") @@ -780,6 +785,10 @@ func ProcessTags(ctx context.Context, tags []string) ([]models.ProcessedTag, err if err != nil { return nil, err } + if seen[Tag.ID] { + continue + } + seen[Tag.ID] = true tagInputs = append(tagInputs, models.ProcessedTag{ ID: Tag.ID, Name: originalName, diff --git a/models/tag_link_shorts.go b/models/tag_link_shorts.go index b968bde..3ed276e 100644 --- a/models/tag_link_shorts.go +++ b/models/tag_link_shorts.go @@ -64,16 +64,21 @@ func CreateBatchTagLinkShorts(ctx context.Context, linkShortID int, tags []Proce if len(tags) == 0 { return nil } + // See ProcessTags function in helpers.go for dedupe reasoning. + seen := make(map[int]bool, len(tags)) err := database.WithTx(ctx, nil, func(tx *sql.Tx) error { - var err error q := sq. Insert("tag_link_shorts"). Columns("link_short_id", "tag_id", "name") for _, tag := range tags { + if seen[tag.ID] { + continue + } + seen[tag.ID] = true q = q.Values(linkShortID, tag.ID, tag.Name) } - _, err = q. + _, err := q. Suffix("ON CONFLICT (link_short_id, tag_id) DO UPDATE SET name = EXCLUDED.name"). PlaceholderFormat(database.GetPlaceholderFormat()). RunWith(tx). diff --git a/models/tag_links.go b/models/tag_links.go index 7f17e65..adfe043 100644 --- a/models/tag_links.go +++ b/models/tag_links.go @@ -64,16 +64,21 @@ func CreateBatchTagLinks(ctx context.Context, linkID int, tags []ProcessedTag) e if len(tags) == 0 { return nil } + // See ProcessTags function in helpers.go for dedupe reasoning. + seen := make(map[int]bool, len(tags)) err := database.WithTx(ctx, nil, func(tx *sql.Tx) error { - var err error q := sq. Insert("tag_links"). Columns("org_link_id", "tag_id", "name") for _, tag := range tags { + if seen[tag.ID] { + continue + } + seen[tag.ID] = true q = q.Values(linkID, tag.ID, tag.Name) } - _, err = q. + _, err := q. Suffix("ON CONFLICT (org_link_id, tag_id) DO UPDATE SET name = EXCLUDED.name"). PlaceholderFormat(database.GetPlaceholderFormat()). RunWith(tx). -- 2.52.0
Applied. To git@git.code.netlandish.com:~netlandish/links a44df5c..41ae80e master -> master