Deduplicate threads on Newest forum tab #38
|
@ -89,9 +89,16 @@ func MapCommentPhotos(comments []*Comment) (CommentPhotoMap, error) {
|
|||
)
|
||||
|
||||
for _, c := range comments {
|
||||
if c == nil {
|
||||
continue
|
||||
}
|
||||
IDs = append(IDs, c.ID)
|
||||
}
|
||||
|
||||
if len(IDs) == 0 {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
res := DB.Model(&CommentPhoto{}).Where("comment_id IN ?", IDs).Find(&ps)
|
||||
if res.Error != nil {
|
||||
return nil, res.Error
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
package models
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
|
@ -25,10 +25,16 @@ type RecentPost struct {
|
|||
func PaginateRecentPosts(user *User, categories []string, allComments bool, pager *Pagination) ([]*RecentPost, error) {
|
||||
var (
|
||||
result = []*RecentPost{}
|
||||
query = (&Comment{}).Preload()
|
||||
blockedUserIDs = BlockedUserIDs(user)
|
||||
wheres = []string{"table_name = 'threads'"}
|
||||
|
||||
// Separate the WHERE clauses that involve forums/threads from the ones
|
||||
// that involve comments. Rationale: if the user is getting a de-duplicated
|
||||
// thread view, we'll end up running two queries - one to get all threads and
|
||||
// another to get the latest comments, and the WHERE clauses need to be separate.
|
||||
wheres = []string{}
|
||||
placeholders = []interface{}{}
|
||||
comment_wheres = []string{"table_name = 'threads'"}
|
||||
comment_ph = []interface{}{}
|
||||
)
|
||||
|
||||
if len(categories) > 0 {
|
||||
|
@ -48,12 +54,12 @@ func PaginateRecentPosts(user *User, categories []string, allComments bool, page
|
|||
|
||||
// Blocked users?
|
||||
if len(blockedUserIDs) > 0 {
|
||||
wheres = append(wheres, "comments.user_id NOT IN ?")
|
||||
placeholders = append(placeholders, blockedUserIDs)
|
||||
comment_wheres = append(comment_wheres, "comments.user_id NOT IN ?")
|
||||
comment_ph = append(comment_ph, blockedUserIDs)
|
||||
}
|
||||
|
||||
// Don't show comments from banned or disabled accounts.
|
||||
wheres = append(wheres, `
|
||||
comment_wheres = append(comment_wheres, `
|
||||
EXISTS (
|
||||
SELECT 1
|
||||
FROM users
|
||||
|
@ -63,81 +69,27 @@ func PaginateRecentPosts(user *User, categories []string, allComments bool, page
|
|||
`)
|
||||
|
||||
// Get the page of recent forum comment IDs of all time.
|
||||
type scanner struct {
|
||||
CommentID uint64
|
||||
ThreadID *uint64
|
||||
ForumID *uint64
|
||||
UpdatedAt time.Time
|
||||
}
|
||||
var scan []scanner
|
||||
var scan NewestForumPostsScanner
|
||||
|
||||
// Deduplicate forum threads: if one thread is BLOWING UP with replies, we should only
|
||||
// mention the thread once and show the newest comment so it doesn't spam the whole page.
|
||||
if config.Current.Database.IsPostgres && !allComments {
|
||||
// Deduplicating is supported in Postgres but not SQLite. We also need a custom
|
||||
// total count query which is 99% the same as the select query except for the
|
||||
// SELECT and ORDER BY bookends.
|
||||
subquery := fmt.Sprintf(`
|
||||
FROM (
|
||||
SELECT DISTINCT ON (threads.id)
|
||||
comments.id AS comment_id,
|
||||
threads.id AS thread_id,
|
||||
forums.id AS forum_id,
|
||||
comments.updated_at AS updated_at
|
||||
FROM comments
|
||||
LEFT OUTER JOIN threads ON (table_name='threads' AND table_id=threads.id)
|
||||
LEFT OUTER JOIN forums ON (threads.forum_id=forums.id)
|
||||
WHERE %s
|
||||
ORDER BY threads.id
|
||||
) AS subquery
|
||||
`, strings.Join(wheres, " AND "))
|
||||
|
||||
query = DB.Raw(fmt.Sprintf(`
|
||||
SELECT comment_id, thread_id, forum_id, updated_at
|
||||
%s
|
||||
ORDER BY subquery.updated_at DESC
|
||||
OFFSET %d LIMIT %d
|
||||
`, subquery, pager.GetOffset(), pager.PerPage), placeholders...)
|
||||
|
||||
// Get a count of records.
|
||||
DB.Raw(fmt.Sprintf("SELECT count(*) %s", subquery), placeholders...).Count(&pager.Total)
|
||||
|
||||
query = query.Find(&scan)
|
||||
if query.Error != nil {
|
||||
return nil, query.Error
|
||||
// Note: only Postgres supports this function (SELECT DISTINCT ON).
|
||||
if res, err := ScanLatestForumCommentsPerThread(wheres, comment_wheres, placeholders, comment_ph, pager); err != nil {
|
||||
return nil, err
|
||||
} else {
|
||||
scan = res
|
||||
}
|
||||
} else {
|
||||
// SQLite/non-Postgres doesn't support DISTINCT ON, this is the old query which
|
||||
// shows objectively all comments and a popular thread may dominate the page.
|
||||
query = DB.Table("comments").Select(
|
||||
`comments.id AS comment_id,
|
||||
threads.id AS thread_id,
|
||||
forums.id AS forum_id,
|
||||
comments.updated_at AS updated_at`,
|
||||
).Joins(
|
||||
"LEFT OUTER JOIN threads ON (table_name = 'threads' AND table_id = threads.id)",
|
||||
).Joins(
|
||||
"LEFT OUTER JOIN forums ON (threads.forum_id = forums.id)",
|
||||
).Where(
|
||||
strings.Join(wheres, " AND "),
|
||||
placeholders...,
|
||||
).Order("comments.updated_at desc")
|
||||
query.Model(&Comment{}).Count(&pager.Total)
|
||||
|
||||
// Execute the query.
|
||||
query = query.Offset(pager.GetOffset()).Limit(pager.PerPage).Find(&scan)
|
||||
if query.Error != nil {
|
||||
return nil, query.Error
|
||||
if res, err := ScanLatestForumCommentsAll(wheres, comment_wheres, placeholders, comment_ph, pager); err != nil {
|
||||
return nil, err
|
||||
} else {
|
||||
scan = res
|
||||
}
|
||||
}
|
||||
|
||||
// Get the total for the pager and scan the page of ID sets.
|
||||
// query.Model(&Comment{}).Count(&pager.Total)
|
||||
// query = query.Offset(pager.GetOffset()).Limit(pager.PerPage).Find(&scan)
|
||||
if query.Error != nil {
|
||||
return nil, query.Error
|
||||
}
|
||||
|
||||
// Ingest the results.
|
||||
var (
|
||||
commentIDs = []uint64{} // collect distinct IDs
|
||||
|
@ -232,6 +184,13 @@ func PaginateRecentPosts(user *User, categories []string, allComments bool, page
|
|||
}
|
||||
}
|
||||
|
||||
// Is the new comment unavailable? (e.g. blocked, banned, disabled)
|
||||
if rc.Comment == nil {
|
||||
rc.Comment = &Comment{
|
||||
Message: "[unavailable]",
|
||||
}
|
||||
}
|
||||
|
||||
if f, ok := forums[rc.ForumID]; ok {
|
||||
rc.Forum = f
|
||||
}
|
||||
|
@ -243,3 +202,140 @@ func PaginateRecentPosts(user *User, categories []string, allComments bool, page
|
|||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// NewestForumPosts collects the IDs of the latest forum posts.
|
||||
type NewestForumPosts struct {
|
||||
CommentID uint64
|
||||
ThreadID *uint64
|
||||
ForumID *uint64
|
||||
UpdatedAt time.Time
|
||||
}
|
||||
|
||||
type NewestForumPostsScanner []NewestForumPosts
|
||||
|
||||
// ScanLatestForumCommentsAll returns a scan of Newest forum posts containing ALL comments, which may
|
||||
// include runs of 'duplicate' forum threads if a given thread was commented on rapidly. This is the classic
|
||||
// 'Newest' tab behavior, showing just ALL forum comments by newest.
|
||||
func ScanLatestForumCommentsAll(wheres, comment_wheres []string, placeholders, comment_ph []interface{}, pager *Pagination) (NewestForumPostsScanner, error) {
|
||||
var scan NewestForumPostsScanner
|
||||
|
||||
// This one is all one joined query so join the wheres/placeholders.
|
||||
wheres = append(wheres, comment_wheres...)
|
||||
placeholders = append(placeholders, comment_ph...)
|
||||
|
||||
// SQLite/non-Postgres doesn't support DISTINCT ON, this is the old query which
|
||||
// shows objectively all comments and a popular thread may dominate the page.
|
||||
query := DB.Table("comments").Select(
|
||||
`comments.id AS comment_id,
|
||||
threads.id AS thread_id,
|
||||
forums.id AS forum_id,
|
||||
comments.updated_at AS updated_at`,
|
||||
).Joins(
|
||||
"LEFT OUTER JOIN threads ON (table_name = 'threads' AND table_id = threads.id)",
|
||||
).Joins(
|
||||
"LEFT OUTER JOIN forums ON (threads.forum_id = forums.id)",
|
||||
).Where(
|
||||
strings.Join(wheres, " AND "),
|
||||
placeholders...,
|
||||
).Order("comments.updated_at desc")
|
||||
query.Model(&Comment{}).Count(&pager.Total)
|
||||
|
||||
// Execute the query.
|
||||
query = query.Offset(pager.GetOffset()).Limit(pager.PerPage).Find(&scan)
|
||||
return scan, query.Error
|
||||
}
|
||||
|
||||
// ScanLatestForumCommentsPerThread returns a scan of Newest forum posts, deduplicated by thread.
|
||||
// Each thread ID will only appear once in the result, paired with the newest comment in that
|
||||
// thread.
|
||||
func ScanLatestForumCommentsPerThread(wheres, comment_wheres []string, placeholders, comment_ph []interface{}, pager *Pagination) (NewestForumPostsScanner, error) {
|
||||
var (
|
||||
result NewestForumPostsScanner
|
||||
threadIDs = []uint64{}
|
||||
|
||||
// Query for ALL thread IDs (in forums the user can see).
|
||||
query = DB.Table(
|
||||
"threads",
|
||||
).Select(`
|
||||
DISTINCT ON (threads.id)
|
||||
threads.forum_id,
|
||||
threads.id AS thread_id,
|
||||
threads.updated_at AS updated_at
|
||||
`).Joins(
|
||||
"JOIN forums ON (threads.forum_id = forums.id)",
|
||||
).Where(
|
||||
strings.Join(wheres, " AND "),
|
||||
placeholders...,
|
||||
).Order(
|
||||
"threads.id",
|
||||
)
|
||||
)
|
||||
|
||||
query = query.Find(&result)
|
||||
if query.Error != nil {
|
||||
return result, query.Error
|
||||
}
|
||||
pager.Total = int64(len(result))
|
||||
|
||||
// Reorder the result by timestamp.
|
||||
sort.Slice(result, func(i, j int) bool {
|
||||
return result[i].UpdatedAt.After(result[j].UpdatedAt)
|
||||
})
|
||||
|
||||
// Subslice the result per the user's pagination setting.
|
||||
var (
|
||||
start = pager.GetOffset()
|
||||
stop = start + pager.PerPage
|
||||
)
|
||||
if start > len(result) {
|
||||
return NewestForumPostsScanner{}, nil
|
||||
} else if stop > len(result) {
|
||||
stop = len(result)
|
||||
}
|
||||
result = result[start:stop]
|
||||
|
||||
// Map the thread IDs to their result row.
|
||||
var threadMap = map[uint64]int{}
|
||||
for i, row := range result {
|
||||
threadIDs = append(threadIDs, *row.ThreadID)
|
||||
threadMap[*row.ThreadID] = i
|
||||
}
|
||||
|
||||
// With these thread IDs, select the newest comments.
|
||||
type scanner struct {
|
||||
ThreadID uint64
|
||||
CommentID uint64
|
||||
}
|
||||
var scan []scanner
|
||||
err := DB.Table(
|
||||
"comments",
|
||||
).Select(
|
||||
"table_id AS thread_id, id AS comment_id",
|
||||
).Where(
|
||||
`table_name='threads' AND table_id IN ?
|
||||
AND updated_at = (SELECT MAX(updated_at)
|
||||
FROM comments c2
|
||||
WHERE c2.table_name=comments.table_name
|
||||
AND c2.table_id=comments.table_id
|
||||
)`,
|
||||
threadIDs,
|
||||
).Where(
|
||||
strings.Join(comment_wheres, " AND "),
|
||||
comment_ph...,
|
||||
).Order(
|
||||
"updated_at desc",
|
||||
).Scan(&scan)
|
||||
if err.Error != nil {
|
||||
log.Error("Getting most recent post IDs: %s", err.Error)
|
||||
return result, err.Error
|
||||
}
|
||||
|
||||
// Populate the comment IDs back in.
|
||||
for _, row := range scan {
|
||||
if idx, ok := threadMap[row.ThreadID]; ok {
|
||||
result[idx].CommentID = row.CommentID
|
||||
}
|
||||
}
|
||||
|
||||
return result, query.Error
|
||||
}
|
||||
|
|
|
@ -230,7 +230,6 @@ func (ts ForumStatsMap) generateRecentPosts(IDs []uint64) {
|
|||
"comments",
|
||||
).Select(
|
||||
"table_id AS thread_id, id AS comment_id",
|
||||
// "forum_id, id AS thread_id, updated_at",
|
||||
).Where(
|
||||
`table_name='threads' AND table_id IN ?
|
||||
AND updated_at = (SELECT MAX(updated_at)
|
||||
|
|
Loading…
Reference in New Issue
Block a user