Deduplicate threads on Newest forum tab #38

Merged
noah merged 2 commits from deduplicate-forum-newest into main 2024-02-16 03:56:16 +00:00
6 changed files with 201 additions and 33 deletions

View File

@ -14,6 +14,11 @@ import (
func Newest() http.HandlerFunc {
tmpl := templates.Must("forum/newest.html")
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Query parameters.
var (
allComments = r.FormValue("all") == "true"
)
// Get the current user.
currentUser, err := session.CurrentUser(r)
if err != nil {
@ -29,7 +34,7 @@ func Newest() http.HandlerFunc {
}
pager.ParsePage(r)
posts, err := models.PaginateRecentPosts(currentUser, config.ForumCategories, pager)
posts, err := models.PaginateRecentPosts(currentUser, config.ForumCategories, allComments, pager)
if err != nil {
session.FlashError(w, r, "Couldn't paginate forums: %s", err)
templates.Redirect(w, "/")
@ -50,6 +55,7 @@ func Newest() http.HandlerFunc {
"Pager": pager,
"RecentPosts": posts,
"PhotoMap": photos,
"AllComments": allComments,
}
if err := tmpl.Execute(w, r, vars); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)

View File

@ -89,9 +89,16 @@ func MapCommentPhotos(comments []*Comment) (CommentPhotoMap, error) {
)
for _, c := range comments {
if c == nil {
continue
}
IDs = append(IDs, c.ID)
}
if len(IDs) == 0 {
return result, nil
}
res := DB.Model(&CommentPhoto{}).Where("comment_id IN ?", IDs).Find(&ps)
if res.Error != nil {
return nil, res.Error

View File

@ -1,9 +1,11 @@
package models
import (
"sort"
"strings"
"time"
"code.nonshy.com/nonshy/website/pkg/config"
"code.nonshy.com/nonshy/website/pkg/log"
)
@ -20,13 +22,19 @@ type RecentPost struct {
}
// PaginateRecentPosts returns all of the comments on a forum paginated.
func PaginateRecentPosts(user *User, categories []string, pager *Pagination) ([]*RecentPost, error) {
func PaginateRecentPosts(user *User, categories []string, allComments bool, pager *Pagination) ([]*RecentPost, error) {
var (
result = []*RecentPost{}
query = (&Comment{}).Preload()
blockedUserIDs = BlockedUserIDs(user)
wheres = []string{"table_name = 'threads'"}
// Separate the WHERE clauses that involve forums/threads from the ones
// that involve comments. Rationale: if the user is getting a de-duplicated
// thread view, we'll end up running two queries - one to get all threads and
// another to get the latest comments, and the WHERE clauses need to be separate.
wheres = []string{}
placeholders = []interface{}{}
comment_wheres = []string{"table_name = 'threads'"}
comment_ph = []interface{}{}
)
if len(categories) > 0 {
@ -46,12 +54,12 @@ func PaginateRecentPosts(user *User, categories []string, pager *Pagination) ([]
// Blocked users?
if len(blockedUserIDs) > 0 {
wheres = append(wheres, "comments.user_id NOT IN ?")
placeholders = append(placeholders, blockedUserIDs)
comment_wheres = append(comment_wheres, "comments.user_id NOT IN ?")
comment_ph = append(comment_ph, blockedUserIDs)
}
// Don't show comments from banned or disabled accounts.
wheres = append(wheres, `
comment_wheres = append(comment_wheres, `
EXISTS (
SELECT 1
FROM users
@ -61,30 +69,25 @@ func PaginateRecentPosts(user *User, categories []string, pager *Pagination) ([]
`)
// Get the page of recent forum comment IDs of all time.
type scanner struct {
CommentID uint64
ThreadID *uint64
ForumID *uint64
}
var scan []scanner
query = DB.Table("comments").Select(
`comments.id AS comment_id,
threads.id AS thread_id,
forums.id AS forum_id`,
).Joins(
"LEFT OUTER JOIN threads ON (table_name = 'threads' AND table_id = threads.id)",
).Joins(
"LEFT OUTER JOIN forums ON (threads.forum_id = forums.id)",
).Where(
strings.Join(wheres, " AND "),
placeholders...,
).Order("comments.updated_at desc")
var scan NewestForumPostsScanner
// Get the total for the pager and scan the page of ID sets.
query.Model(&Comment{}).Count(&pager.Total)
query = query.Offset(pager.GetOffset()).Limit(pager.PerPage).Find(&scan)
if query.Error != nil {
return nil, query.Error
// Deduplicate forum threads: if one thread is BLOWING UP with replies, we should only
// mention the thread once and show the newest comment so it doesn't spam the whole page.
if config.Current.Database.IsPostgres && !allComments {
// Note: only Postgres supports this function (SELECT DISTINCT ON).
if res, err := ScanLatestForumCommentsPerThread(wheres, comment_wheres, placeholders, comment_ph, pager); err != nil {
return nil, err
} else {
scan = res
}
} else {
// SQLite/non-Postgres doesn't support DISTINCT ON, this is the old query which
// shows objectively all comments and a popular thread may dominate the page.
if res, err := ScanLatestForumCommentsAll(wheres, comment_wheres, placeholders, comment_ph, pager); err != nil {
return nil, err
} else {
scan = res
}
}
// Ingest the results.
@ -181,6 +184,13 @@ func PaginateRecentPosts(user *User, categories []string, pager *Pagination) ([]
}
}
// Is the new comment unavailable? (e.g. blocked, banned, disabled)
if rc.Comment == nil {
rc.Comment = &Comment{
Message: "[unavailable]",
}
}
if f, ok := forums[rc.ForumID]; ok {
rc.Forum = f
}
@ -192,3 +202,140 @@ func PaginateRecentPosts(user *User, categories []string, pager *Pagination) ([]
return result, nil
}
// NewestForumPosts collects the IDs of the latest forum posts.
type NewestForumPosts struct {
CommentID uint64
ThreadID *uint64
ForumID *uint64
UpdatedAt time.Time
}
type NewestForumPostsScanner []NewestForumPosts
// ScanLatestForumCommentsAll returns a scan of Newest forum posts containing ALL comments, which may
// include runs of 'duplicate' forum threads if a given thread was commented on rapidly. This is the classic
// 'Newest' tab behavior, showing just ALL forum comments by newest.
func ScanLatestForumCommentsAll(wheres, comment_wheres []string, placeholders, comment_ph []interface{}, pager *Pagination) (NewestForumPostsScanner, error) {
var scan NewestForumPostsScanner
// This one is all one joined query so join the wheres/placeholders.
wheres = append(wheres, comment_wheres...)
placeholders = append(placeholders, comment_ph...)
// SQLite/non-Postgres doesn't support DISTINCT ON, this is the old query which
// shows objectively all comments and a popular thread may dominate the page.
query := DB.Table("comments").Select(
`comments.id AS comment_id,
threads.id AS thread_id,
forums.id AS forum_id,
comments.updated_at AS updated_at`,
).Joins(
"LEFT OUTER JOIN threads ON (table_name = 'threads' AND table_id = threads.id)",
).Joins(
"LEFT OUTER JOIN forums ON (threads.forum_id = forums.id)",
).Where(
strings.Join(wheres, " AND "),
placeholders...,
).Order("comments.updated_at desc")
query.Model(&Comment{}).Count(&pager.Total)
// Execute the query.
query = query.Offset(pager.GetOffset()).Limit(pager.PerPage).Find(&scan)
return scan, query.Error
}
// ScanLatestForumCommentsPerThread returns a scan of Newest forum posts, deduplicated by thread.
// Each thread ID will only appear once in the result, paired with the newest comment in that
// thread.
func ScanLatestForumCommentsPerThread(wheres, comment_wheres []string, placeholders, comment_ph []interface{}, pager *Pagination) (NewestForumPostsScanner, error) {
var (
result NewestForumPostsScanner
threadIDs = []uint64{}
// Query for ALL thread IDs (in forums the user can see).
query = DB.Table(
"threads",
).Select(`
DISTINCT ON (threads.id)
threads.forum_id,
threads.id AS thread_id,
threads.updated_at AS updated_at
`).Joins(
"JOIN forums ON (threads.forum_id = forums.id)",
).Where(
strings.Join(wheres, " AND "),
placeholders...,
).Order(
"threads.id",
)
)
query = query.Find(&result)
if query.Error != nil {
return result, query.Error
}
pager.Total = int64(len(result))
// Reorder the result by timestamp.
sort.Slice(result, func(i, j int) bool {
return result[i].UpdatedAt.After(result[j].UpdatedAt)
})
// Subslice the result per the user's pagination setting.
var (
start = pager.GetOffset()
stop = start + pager.PerPage
)
if start > len(result) {
return NewestForumPostsScanner{}, nil
} else if stop > len(result) {
stop = len(result)
}
result = result[start:stop]
// Map the thread IDs to their result row.
var threadMap = map[uint64]int{}
for i, row := range result {
threadIDs = append(threadIDs, *row.ThreadID)
threadMap[*row.ThreadID] = i
}
// With these thread IDs, select the newest comments.
type scanner struct {
ThreadID uint64
CommentID uint64
}
var scan []scanner
err := DB.Table(
"comments",
).Select(
"table_id AS thread_id, id AS comment_id",
).Where(
`table_name='threads' AND table_id IN ?
AND updated_at = (SELECT MAX(updated_at)
FROM comments c2
WHERE c2.table_name=comments.table_name
AND c2.table_id=comments.table_id
)`,
threadIDs,
).Where(
strings.Join(comment_wheres, " AND "),
comment_ph...,
).Order(
"updated_at desc",
).Scan(&scan)
if err.Error != nil {
log.Error("Getting most recent post IDs: %s", err.Error)
return result, err.Error
}
// Populate the comment IDs back in.
for _, row := range scan {
if idx, ok := threadMap[row.ThreadID]; ok {
result[idx].CommentID = row.CommentID
}
}
return result, query.Error
}

View File

@ -230,7 +230,6 @@ func (ts ForumStatsMap) generateRecentPosts(IDs []uint64) {
"comments",
).Select(
"table_id AS thread_id, id AS comment_id",
// "forum_id, id AS thread_id, updated_at",
).Where(
`table_name='threads' AND table_id IN ?
AND updated_at = (SELECT MAX(updated_at)

View File

@ -99,7 +99,7 @@
</li>
<li>
On the
<a href="/members"><strong><i class="fa fa-comments mr-1"></i> Forums</strong></a>
<a href="/f/circle"><strong><i class="fa fa-comments mr-1"></i> Forums</strong></a>
you can access exclusive inner circle-only boards.
</li>
<li>

View File

@ -39,7 +39,16 @@
</div>
<div class="p-4">
Found {{FormatNumberCommas .Pager.Total}} posts (page {{.Pager.Page}} of {{.Pager.Pages}})
Found {{FormatNumberCommas .Pager.Total}} {{if .AllComments}}posts{{else}}threads{{end}} (page {{.Pager.Page}} of {{.Pager.Pages}})
<div class="mt-2">
{{if not .AllComments}}
<!-- Default view is to deduplicate and show only threads and their newest comment -->
Showing only the latest comment per thread. <a href="?{{QueryPlus "all" "true"}}">Show all comments instead?</a>
{{else}}
Showing <strong>all</strong> forum posts by most recent. <a href="{{.Request.URL.Path}}">Deduplicate by thread?</a>
{{end}}
</div>
</div>
<div class="p-4">