From 3c0473c633fbff0f6fae203eb3b1039cca1aeaed Mon Sep 17 00:00:00 2001 From: Noah Petherbridge Date: Wed, 14 Feb 2024 21:38:20 -0800 Subject: [PATCH 1/2] WIP: Deduplicate threads on Newest forum tab --- pkg/controller/forum/newest.go | 8 ++- pkg/models/forum_recent.go | 81 ++++++++++++++++++++----- web/templates/account/inner_circle.html | 2 +- web/templates/forum/newest.html | 11 +++- 4 files changed, 84 insertions(+), 18 deletions(-) diff --git a/pkg/controller/forum/newest.go b/pkg/controller/forum/newest.go index 6ff075f..665cf29 100644 --- a/pkg/controller/forum/newest.go +++ b/pkg/controller/forum/newest.go @@ -14,6 +14,11 @@ import ( func Newest() http.HandlerFunc { tmpl := templates.Must("forum/newest.html") return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Query parameters. + var ( + allComments = r.FormValue("all") == "true" + ) + // Get the current user. currentUser, err := session.CurrentUser(r) if err != nil { @@ -29,7 +34,7 @@ func Newest() http.HandlerFunc { } pager.ParsePage(r) - posts, err := models.PaginateRecentPosts(currentUser, config.ForumCategories, pager) + posts, err := models.PaginateRecentPosts(currentUser, config.ForumCategories, allComments, pager) if err != nil { session.FlashError(w, r, "Couldn't paginate forums: %s", err) templates.Redirect(w, "/") @@ -50,6 +55,7 @@ func Newest() http.HandlerFunc { "Pager": pager, "RecentPosts": posts, "PhotoMap": photos, + "AllComments": allComments, } if err := tmpl.Execute(w, r, vars); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) diff --git a/pkg/models/forum_recent.go b/pkg/models/forum_recent.go index cfa4928..195c358 100644 --- a/pkg/models/forum_recent.go +++ b/pkg/models/forum_recent.go @@ -1,9 +1,11 @@ package models import ( + "fmt" "strings" "time" + "code.nonshy.com/nonshy/website/pkg/config" "code.nonshy.com/nonshy/website/pkg/log" ) @@ -20,7 +22,7 @@ type RecentPost struct { } // PaginateRecentPosts returns all of the comments on a forum paginated. -func PaginateRecentPosts(user *User, categories []string, pager *Pagination) ([]*RecentPost, error) { +func PaginateRecentPosts(user *User, categories []string, allComments bool, pager *Pagination) ([]*RecentPost, error) { var ( result = []*RecentPost{} query = (&Comment{}).Preload() @@ -65,24 +67,73 @@ func PaginateRecentPosts(user *User, categories []string, pager *Pagination) ([] CommentID uint64 ThreadID *uint64 ForumID *uint64 + UpdatedAt time.Time } var scan []scanner - query = DB.Table("comments").Select( - `comments.id AS comment_id, - threads.id AS thread_id, - forums.id AS forum_id`, - ).Joins( - "LEFT OUTER JOIN threads ON (table_name = 'threads' AND table_id = threads.id)", - ).Joins( - "LEFT OUTER JOIN forums ON (threads.forum_id = forums.id)", - ).Where( - strings.Join(wheres, " AND "), - placeholders..., - ).Order("comments.updated_at desc") + + // Deduplicate forum threads: if one thread is BLOWING UP with replies, we should only + // mention the thread once and show the newest comment so it doesn't spam the whole page. + if config.Current.Database.IsPostgres && !allComments { + // Deduplicating is supported in Postgres but not SQLite. We also need a custom + // total count query which is 99% the same as the select query except for the + // SELECT and ORDER BY bookends. + subquery := fmt.Sprintf(` + FROM ( + SELECT DISTINCT ON (threads.id) + comments.id AS comment_id, + threads.id AS thread_id, + forums.id AS forum_id, + comments.updated_at AS updated_at + FROM comments + LEFT OUTER JOIN threads ON (table_name='threads' AND table_id=threads.id) + LEFT OUTER JOIN forums ON (threads.forum_id=forums.id) + WHERE %s + ORDER BY threads.id + ) AS subquery + `, strings.Join(wheres, " AND ")) + + query = DB.Raw(fmt.Sprintf(` + SELECT comment_id, thread_id, forum_id, updated_at + %s + ORDER BY subquery.updated_at DESC + OFFSET %d LIMIT %d + `, subquery, pager.GetOffset(), pager.PerPage), placeholders...) + + // Get a count of records. + DB.Raw(fmt.Sprintf("SELECT count(*) %s", subquery), placeholders...).Count(&pager.Total) + + query = query.Find(&scan) + if query.Error != nil { + return nil, query.Error + } + } else { + // SQLite/non-Postgres doesn't support DISTINCT ON, this is the old query which + // shows objectively all comments and a popular thread may dominate the page. + query = DB.Table("comments").Select( + `comments.id AS comment_id, + threads.id AS thread_id, + forums.id AS forum_id, + comments.updated_at AS updated_at`, + ).Joins( + "LEFT OUTER JOIN threads ON (table_name = 'threads' AND table_id = threads.id)", + ).Joins( + "LEFT OUTER JOIN forums ON (threads.forum_id = forums.id)", + ).Where( + strings.Join(wheres, " AND "), + placeholders..., + ).Order("comments.updated_at desc") + query.Model(&Comment{}).Count(&pager.Total) + + // Execute the query. + query = query.Offset(pager.GetOffset()).Limit(pager.PerPage).Find(&scan) + if query.Error != nil { + return nil, query.Error + } + } // Get the total for the pager and scan the page of ID sets. - query.Model(&Comment{}).Count(&pager.Total) - query = query.Offset(pager.GetOffset()).Limit(pager.PerPage).Find(&scan) + // query.Model(&Comment{}).Count(&pager.Total) + // query = query.Offset(pager.GetOffset()).Limit(pager.PerPage).Find(&scan) if query.Error != nil { return nil, query.Error } diff --git a/web/templates/account/inner_circle.html b/web/templates/account/inner_circle.html index 3e909e3..7779b9c 100644 --- a/web/templates/account/inner_circle.html +++ b/web/templates/account/inner_circle.html @@ -99,7 +99,7 @@
  • On the - Forums + Forums you can access exclusive inner circle-only boards.
  • diff --git a/web/templates/forum/newest.html b/web/templates/forum/newest.html index 072fc30..bd46ff4 100644 --- a/web/templates/forum/newest.html +++ b/web/templates/forum/newest.html @@ -39,7 +39,16 @@
    - Found {{FormatNumberCommas .Pager.Total}} posts (page {{.Pager.Page}} of {{.Pager.Pages}}) + Found {{FormatNumberCommas .Pager.Total}} {{if .AllComments}}posts{{else}}threads{{end}} (page {{.Pager.Page}} of {{.Pager.Pages}}) + +
    + {{if not .AllComments}} + + Showing only the latest comment per thread. Show all comments instead? + {{else}} + Showing all forum posts by most recent. Deduplicate by thread? + {{end}} +
    From 62d56d59246e78f632d57598806567b8e9a50e23 Mon Sep 17 00:00:00 2001 From: Noah Petherbridge Date: Thu, 15 Feb 2024 19:53:25 -0800 Subject: [PATCH 2/2] Better "Newest" tab for forums --- pkg/models/comment_photo.go | 7 ++ pkg/models/forum_recent.go | 236 +++++++++++++++++++++++++----------- pkg/models/forum_stats.go | 1 - 3 files changed, 173 insertions(+), 71 deletions(-) diff --git a/pkg/models/comment_photo.go b/pkg/models/comment_photo.go index 7bbbf98..f4c52eb 100644 --- a/pkg/models/comment_photo.go +++ b/pkg/models/comment_photo.go @@ -89,9 +89,16 @@ func MapCommentPhotos(comments []*Comment) (CommentPhotoMap, error) { ) for _, c := range comments { + if c == nil { + continue + } IDs = append(IDs, c.ID) } + if len(IDs) == 0 { + return result, nil + } + res := DB.Model(&CommentPhoto{}).Where("comment_id IN ?", IDs).Find(&ps) if res.Error != nil { return nil, res.Error diff --git a/pkg/models/forum_recent.go b/pkg/models/forum_recent.go index 195c358..e7ec5f4 100644 --- a/pkg/models/forum_recent.go +++ b/pkg/models/forum_recent.go @@ -1,7 +1,7 @@ package models import ( - "fmt" + "sort" "strings" "time" @@ -25,10 +25,16 @@ type RecentPost struct { func PaginateRecentPosts(user *User, categories []string, allComments bool, pager *Pagination) ([]*RecentPost, error) { var ( result = []*RecentPost{} - query = (&Comment{}).Preload() blockedUserIDs = BlockedUserIDs(user) - wheres = []string{"table_name = 'threads'"} + + // Separate the WHERE clauses that involve forums/threads from the ones + // that involve comments. Rationale: if the user is getting a de-duplicated + // thread view, we'll end up running two queries - one to get all threads and + // another to get the latest comments, and the WHERE clauses need to be separate. + wheres = []string{} placeholders = []interface{}{} + comment_wheres = []string{"table_name = 'threads'"} + comment_ph = []interface{}{} ) if len(categories) > 0 { @@ -48,12 +54,12 @@ func PaginateRecentPosts(user *User, categories []string, allComments bool, page // Blocked users? if len(blockedUserIDs) > 0 { - wheres = append(wheres, "comments.user_id NOT IN ?") - placeholders = append(placeholders, blockedUserIDs) + comment_wheres = append(comment_wheres, "comments.user_id NOT IN ?") + comment_ph = append(comment_ph, blockedUserIDs) } // Don't show comments from banned or disabled accounts. - wheres = append(wheres, ` + comment_wheres = append(comment_wheres, ` EXISTS ( SELECT 1 FROM users @@ -63,81 +69,27 @@ func PaginateRecentPosts(user *User, categories []string, allComments bool, page `) // Get the page of recent forum comment IDs of all time. - type scanner struct { - CommentID uint64 - ThreadID *uint64 - ForumID *uint64 - UpdatedAt time.Time - } - var scan []scanner + var scan NewestForumPostsScanner // Deduplicate forum threads: if one thread is BLOWING UP with replies, we should only // mention the thread once and show the newest comment so it doesn't spam the whole page. if config.Current.Database.IsPostgres && !allComments { - // Deduplicating is supported in Postgres but not SQLite. We also need a custom - // total count query which is 99% the same as the select query except for the - // SELECT and ORDER BY bookends. - subquery := fmt.Sprintf(` - FROM ( - SELECT DISTINCT ON (threads.id) - comments.id AS comment_id, - threads.id AS thread_id, - forums.id AS forum_id, - comments.updated_at AS updated_at - FROM comments - LEFT OUTER JOIN threads ON (table_name='threads' AND table_id=threads.id) - LEFT OUTER JOIN forums ON (threads.forum_id=forums.id) - WHERE %s - ORDER BY threads.id - ) AS subquery - `, strings.Join(wheres, " AND ")) - - query = DB.Raw(fmt.Sprintf(` - SELECT comment_id, thread_id, forum_id, updated_at - %s - ORDER BY subquery.updated_at DESC - OFFSET %d LIMIT %d - `, subquery, pager.GetOffset(), pager.PerPage), placeholders...) - - // Get a count of records. - DB.Raw(fmt.Sprintf("SELECT count(*) %s", subquery), placeholders...).Count(&pager.Total) - - query = query.Find(&scan) - if query.Error != nil { - return nil, query.Error + // Note: only Postgres supports this function (SELECT DISTINCT ON). + if res, err := ScanLatestForumCommentsPerThread(wheres, comment_wheres, placeholders, comment_ph, pager); err != nil { + return nil, err + } else { + scan = res } } else { // SQLite/non-Postgres doesn't support DISTINCT ON, this is the old query which // shows objectively all comments and a popular thread may dominate the page. - query = DB.Table("comments").Select( - `comments.id AS comment_id, - threads.id AS thread_id, - forums.id AS forum_id, - comments.updated_at AS updated_at`, - ).Joins( - "LEFT OUTER JOIN threads ON (table_name = 'threads' AND table_id = threads.id)", - ).Joins( - "LEFT OUTER JOIN forums ON (threads.forum_id = forums.id)", - ).Where( - strings.Join(wheres, " AND "), - placeholders..., - ).Order("comments.updated_at desc") - query.Model(&Comment{}).Count(&pager.Total) - - // Execute the query. - query = query.Offset(pager.GetOffset()).Limit(pager.PerPage).Find(&scan) - if query.Error != nil { - return nil, query.Error + if res, err := ScanLatestForumCommentsAll(wheres, comment_wheres, placeholders, comment_ph, pager); err != nil { + return nil, err + } else { + scan = res } } - // Get the total for the pager and scan the page of ID sets. - // query.Model(&Comment{}).Count(&pager.Total) - // query = query.Offset(pager.GetOffset()).Limit(pager.PerPage).Find(&scan) - if query.Error != nil { - return nil, query.Error - } - // Ingest the results. var ( commentIDs = []uint64{} // collect distinct IDs @@ -232,6 +184,13 @@ func PaginateRecentPosts(user *User, categories []string, allComments bool, page } } + // Is the new comment unavailable? (e.g. blocked, banned, disabled) + if rc.Comment == nil { + rc.Comment = &Comment{ + Message: "[unavailable]", + } + } + if f, ok := forums[rc.ForumID]; ok { rc.Forum = f } @@ -243,3 +202,140 @@ func PaginateRecentPosts(user *User, categories []string, allComments bool, page return result, nil } + +// NewestForumPosts collects the IDs of the latest forum posts. +type NewestForumPosts struct { + CommentID uint64 + ThreadID *uint64 + ForumID *uint64 + UpdatedAt time.Time +} + +type NewestForumPostsScanner []NewestForumPosts + +// ScanLatestForumCommentsAll returns a scan of Newest forum posts containing ALL comments, which may +// include runs of 'duplicate' forum threads if a given thread was commented on rapidly. This is the classic +// 'Newest' tab behavior, showing just ALL forum comments by newest. +func ScanLatestForumCommentsAll(wheres, comment_wheres []string, placeholders, comment_ph []interface{}, pager *Pagination) (NewestForumPostsScanner, error) { + var scan NewestForumPostsScanner + + // This one is all one joined query so join the wheres/placeholders. + wheres = append(wheres, comment_wheres...) + placeholders = append(placeholders, comment_ph...) + + // SQLite/non-Postgres doesn't support DISTINCT ON, this is the old query which + // shows objectively all comments and a popular thread may dominate the page. + query := DB.Table("comments").Select( + `comments.id AS comment_id, + threads.id AS thread_id, + forums.id AS forum_id, + comments.updated_at AS updated_at`, + ).Joins( + "LEFT OUTER JOIN threads ON (table_name = 'threads' AND table_id = threads.id)", + ).Joins( + "LEFT OUTER JOIN forums ON (threads.forum_id = forums.id)", + ).Where( + strings.Join(wheres, " AND "), + placeholders..., + ).Order("comments.updated_at desc") + query.Model(&Comment{}).Count(&pager.Total) + + // Execute the query. + query = query.Offset(pager.GetOffset()).Limit(pager.PerPage).Find(&scan) + return scan, query.Error +} + +// ScanLatestForumCommentsPerThread returns a scan of Newest forum posts, deduplicated by thread. +// Each thread ID will only appear once in the result, paired with the newest comment in that +// thread. +func ScanLatestForumCommentsPerThread(wheres, comment_wheres []string, placeholders, comment_ph []interface{}, pager *Pagination) (NewestForumPostsScanner, error) { + var ( + result NewestForumPostsScanner + threadIDs = []uint64{} + + // Query for ALL thread IDs (in forums the user can see). + query = DB.Table( + "threads", + ).Select(` + DISTINCT ON (threads.id) + threads.forum_id, + threads.id AS thread_id, + threads.updated_at AS updated_at + `).Joins( + "JOIN forums ON (threads.forum_id = forums.id)", + ).Where( + strings.Join(wheres, " AND "), + placeholders..., + ).Order( + "threads.id", + ) + ) + + query = query.Find(&result) + if query.Error != nil { + return result, query.Error + } + pager.Total = int64(len(result)) + + // Reorder the result by timestamp. + sort.Slice(result, func(i, j int) bool { + return result[i].UpdatedAt.After(result[j].UpdatedAt) + }) + + // Subslice the result per the user's pagination setting. + var ( + start = pager.GetOffset() + stop = start + pager.PerPage + ) + if start > len(result) { + return NewestForumPostsScanner{}, nil + } else if stop > len(result) { + stop = len(result) + } + result = result[start:stop] + + // Map the thread IDs to their result row. + var threadMap = map[uint64]int{} + for i, row := range result { + threadIDs = append(threadIDs, *row.ThreadID) + threadMap[*row.ThreadID] = i + } + + // With these thread IDs, select the newest comments. + type scanner struct { + ThreadID uint64 + CommentID uint64 + } + var scan []scanner + err := DB.Table( + "comments", + ).Select( + "table_id AS thread_id, id AS comment_id", + ).Where( + `table_name='threads' AND table_id IN ? + AND updated_at = (SELECT MAX(updated_at) + FROM comments c2 + WHERE c2.table_name=comments.table_name + AND c2.table_id=comments.table_id + )`, + threadIDs, + ).Where( + strings.Join(comment_wheres, " AND "), + comment_ph..., + ).Order( + "updated_at desc", + ).Scan(&scan) + if err.Error != nil { + log.Error("Getting most recent post IDs: %s", err.Error) + return result, err.Error + } + + // Populate the comment IDs back in. + for _, row := range scan { + if idx, ok := threadMap[row.ThreadID]; ok { + result[idx].CommentID = row.CommentID + } + } + + return result, query.Error +} diff --git a/pkg/models/forum_stats.go b/pkg/models/forum_stats.go index 2bbb8ba..59d2d6d 100644 --- a/pkg/models/forum_stats.go +++ b/pkg/models/forum_stats.go @@ -230,7 +230,6 @@ func (ts ForumStatsMap) generateRecentPosts(IDs []uint64) { "comments", ).Select( "table_id AS thread_id, id AS comment_id", - // "forum_id, id AS thread_id, updated_at", ).Where( `table_name='threads' AND table_id IN ? AND updated_at = (SELECT MAX(updated_at)