website/pkg/models/demographic/queries.go
Noah Petherbridge 2f31d678d0 Usage Statistics and Website Demographics
Adds two new features to collect and show useful analytics.

Usage Statistics:
* Begin tracking daily active users who log in and interact with major features
  of the website each day, such as the chat room, forum and gallery.

Demographics page:
* For marketing, the home page now shows live statistics about the breakdown of
  content (explicit vs. non-explicit) on the site, and the /insights page gives
  a lot more data in detail.
* Show the percent split in photo gallery content and how many users opt-in or
  share explicit content on the site.
* Show high-level demographics of the members (by age range, gender, orientation)

Misc cleanup:
* Rearrange model list in data export to match the auto-create statements.
* In data exports, include the forum_memberships, push_notifications and
  usage_statistics tables.
2024-09-11 19:28:52 -07:00

285 lines
7.0 KiB
Go

package demographic
import (
"errors"
"sync"
"time"
"code.nonshy.com/nonshy/website/pkg/config"
"code.nonshy.com/nonshy/website/pkg/log"
"code.nonshy.com/nonshy/website/pkg/models"
)
// Cached statistics (in case the queries are heavy to hit too often).
var (
cachedDemographic Demographic
cacheMu sync.Mutex
)
// Get the current cached demographics result.
func Get() (Demographic, error) {
// Do we have the results cached?
var result = cachedDemographic
if !result.Computed || time.Since(result.LastUpdated) > config.DemographicsCacheTTL {
cacheMu.Lock()
defer cacheMu.Unlock()
// If we have a race of threads: e.g. one request is pulling the stats and the second is locked.
// Check if we have an updated result from the first thread.
if time.Since(cachedDemographic.LastUpdated) < config.DemographicsCacheTTL {
return cachedDemographic, nil
}
// Get the latest.
res, err := Generate()
if err != nil {
return result, err
}
cachedDemographic = res
}
return cachedDemographic, nil
}
// Refresh the demographics cache, pulling fresh results from the database every time.
func Refresh() (Demographic, error) {
cacheMu.Lock()
cachedDemographic = Demographic{}
cacheMu.Unlock()
return Get()
}
// Generate the demographics result.
func Generate() (Demographic, error) {
if !config.Current.Database.IsPostgres {
return cachedDemographic, errors.New("this feature requires a PostgreSQL database")
}
result := Demographic{
Computed: true,
LastUpdated: time.Now(),
Photo: PhotoStatistics(),
People: PeopleStatistics(),
}
return result, nil
}
// PeopleStatistics pulls various metrics about users of the website.
func PeopleStatistics() People {
var result = People{
ByAgeRange: map[string]int64{},
ByGender: map[string]int64{},
ByOrientation: map[string]int64{},
}
type record struct {
MetricType string
MetricValue string
MetricCount int64
}
var records []record
res := models.DB.Raw(`
-- Users who opt in/out of explicit content
WITH subquery_explicit AS (
SELECT
SUM(CASE WHEN explicit IS TRUE THEN 1 ELSE 0 END) AS explicit_count,
SUM(CASE WHEN explicit IS NOT TRUE THEN 1 ELSE 0 END) AS non_explicit_count
FROM users
WHERE users.status = 'active'
AND users.certified IS TRUE
),
-- Users who share at least one explicit photo on public
subquery_explicit_photo AS (
SELECT
COUNT(*) AS user_count
FROM users
WHERE users.status = 'active'
AND users.certified IS TRUE
AND EXISTS (
SELECT 1
FROM photos
WHERE photos.user_id = users.id
AND photos.explicit IS TRUE
AND photos.visibility = 'public'
)
),
-- User counts by age
subquery_ages AS (
SELECT
CASE
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 0 AND 25 THEN '18-25'
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 26 and 35 THEN '26-35'
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 36 and 45 THEN '36-45'
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 46 and 55 THEN '46-55'
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 56 and 65 THEN '56-65'
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 66 and 75 THEN '66-75'
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 76 and 85 THEN '76-85'
ELSE '86+'
END AS age_range,
COUNT(*) AS user_count
FROM
users
GROUP BY
CASE
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 0 AND 25 THEN '18-25'
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 26 and 35 THEN '26-35'
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 36 and 45 THEN '36-45'
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 46 and 55 THEN '46-55'
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 56 and 65 THEN '56-65'
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 66 and 75 THEN '66-75'
WHEN DATE_PART('year', AGE(birthdate)) BETWEEN 76 and 85 THEN '76-85'
ELSE '86+'
END
),
-- User counts by gender
subquery_gender AS (
SELECT
profile_fields.value AS gender,
COUNT(*) AS user_count
FROM users
JOIN profile_fields ON profile_fields.user_id = users.id
WHERE users.status = 'active'
AND users.certified IS TRUE
AND profile_fields.name = 'gender'
GROUP BY profile_fields.value
),
-- User counts by orientation
subquery_orientation AS (
SELECT
profile_fields.value AS orientation,
COUNT(*) AS user_count
FROM users
JOIN profile_fields ON profile_fields.user_id = users.id
WHERE users.status = 'active'
AND users.certified IS TRUE
AND profile_fields.name = 'orientation'
GROUP BY profile_fields.value
)
SELECT
'ExplicitCount' AS metric_type,
'explicit' AS metric_value,
explicit_count AS metric_count
FROM subquery_explicit
UNION ALL
SELECT
'ExplicitPhotoCount' AS metric_type,
'count' AS metric_value,
user_count AS metric_count
FROM subquery_explicit_photo
UNION ALL
SELECT
'ExplicitCount' AS metric_type,
'non_explicit' AS metric_value,
non_explicit_count AS metric_count
FROM subquery_explicit
UNION ALL
SELECT
'AgeCounts' AS metric_type,
age_range AS metric_value,
user_count AS metric_count
FROM subquery_ages
UNION ALL
SELECT
'GenderCount' AS metric_type,
gender AS metric_value,
user_count AS metric_count
FROM subquery_gender
UNION ALL
SELECT
'OrientationCount' AS metric_type,
orientation AS metric_value,
user_count AS metric_count
FROM subquery_orientation
`).Scan(&records)
if res.Error != nil {
log.Error("PeopleStatistics: %s", res.Error)
return result
}
// Ingest the records.
for _, row := range records {
switch row.MetricType {
case "ExplicitCount":
result.Total += row.MetricCount
if row.MetricValue == "explicit" {
result.ExplicitOptIn = row.MetricCount
}
case "ExplicitPhotoCount":
result.ExplicitPhoto = row.MetricCount
case "AgeCounts":
if _, ok := result.ByAgeRange[row.MetricValue]; !ok {
result.ByAgeRange[row.MetricValue] = 0
}
result.ByAgeRange[row.MetricValue] += row.MetricCount
case "GenderCount":
if _, ok := result.ByGender[row.MetricValue]; !ok {
result.ByGender[row.MetricValue] = 0
}
result.ByGender[row.MetricValue] += row.MetricCount
case "OrientationCount":
if _, ok := result.ByOrientation[row.MetricValue]; !ok {
result.ByOrientation[row.MetricValue] = 0
}
result.ByOrientation[row.MetricValue] += row.MetricCount
}
}
return result
}
// PhotoStatistics gets info about photo usage on the website.
//
// Counts of Explicit vs. Non-Explicit photos.
func PhotoStatistics() Photo {
var result Photo
type record struct {
Explicit bool
C int64
}
var records []record
res := models.DB.Raw(`
SELECT
photos.explicit,
count(photos.id) AS c
FROM
photos
WHERE
photos.visibility = 'public'
GROUP BY photos.explicit
ORDER BY c DESC
`).Scan(&records)
if res.Error != nil {
log.Error("PhotoStatistics: %s", res.Error)
return result
}
for _, row := range records {
result.Total += row.C
if row.Explicit {
result.Explicit += row.C
} else {
result.NonExplicit += row.C
}
}
return result
}