Wikipedia:Database reports/Longest active user article editing streaks/Configuration
This report is updated every 7 days.
Source code
edit/*
Copyright 2023 Kunal Mehta <legoktm@debian.org>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
use anyhow::Result;
use dbreps2::{str_vec, Frequency, Report};
use mysql_async::prelude::*;
use mysql_async::Conn;
use time::format_description::FormatItem;
use time::macros::format_description;
use time::{Date, Duration, OffsetDateTime};
const TIMESTAMP_DB: &[FormatItem] =
format_description!("[year][month][day]000000");
const TIMESTAMP_LIKE: &[FormatItem] =
format_description!("[year][month][day]%");
const TIMESTAMP_HUMAN: &[FormatItem] =
format_description!("[month repr:long] [day padding:none], [year]");
pub struct Row {
user_name: String,
days: u64,
start: Date,
}
pub struct UserArticleStreaks {}
impl Report<Row> for UserArticleStreaks {
fn title(&self) -> &'static str {
"Longest active user article editing streaks"
}
fn frequency(&self) -> Frequency {
Frequency::Weekly
}
fn query(&self) -> &'static str {
r#"
/* userarticlestreaks.rs SLOW_OK */
SELECT
actor_name,
COUNT(DISTINCT substring(rev_timestamp, 1, 8)) as days
FROM
revision_userindex
JOIN page ON rev_page = page_id
JOIN actor_revision ON rev_actor = actor_id
WHERE
rev_timestamp < ?
AND rev_timestamp >= ?
AND page_namespace = 0
GROUP BY
actor_name
HAVING
COUNT(DISTINCT substring(rev_timestamp, 1, 8)) > 364
ORDER BY
days DESC
"#
}
async fn run_query(&self, conn: &mut Conn) -> Result<Vec<Row>> {
let today = OffsetDateTime::now_utc().date();
let one_year_ago = today - Duration::days(365);
let mut rows = conn
.exec_map(
self.query(),
(
today.format(TIMESTAMP_DB).unwrap(),
one_year_ago.format(TIMESTAMP_DB).unwrap(),
),
|(user_name, days)| Row {
user_name,
days,
start: one_year_ago,
},
)
.await?;
for row in &mut rows {
loop {
let yesterday = row.start - Duration::days(1);
let made_edit: Option<usize> = conn
.exec_first(
r#"
/* userstreaks.rs SLOW_OK */
SELECT
1
FROM
revision_userindex
JOIN page ON rev_page = page_id
JOIN actor_revision ON rev_actor = actor_id
WHERE
actor_name = ?
AND rev_timestamp LIKE ?
AND page_namespace = 0
LIMIT 1"#,
(
&row.user_name,
yesterday.format(TIMESTAMP_LIKE).unwrap(),
),
)
.await?;
if made_edit.is_some() {
/*println!(
"{} edited on {:?} (len: {})",
&row.user_name, &row.start, row.days
);*/
row.days += 1;
row.start = yesterday;
} else {
dbg!((&row.user_name, row.days));
break;
}
}
}
rows.sort_by_key(|row| row.days);
rows.reverse();
Ok(rows)
}
fn intro(&self) -> &'static str {
"Users who have an active streak of editing at least one mainspace article per day (minimum 365 days listed)"
}
fn headings(&self) -> Vec<&'static str> {
vec!["User", "Consecutive days", "Start of streak"]
}
fn format_row(&self, row: &Row) -> Vec<String> {
str_vec![
format!("[[User:{}]]", row.user_name),
//format!("{} ({})", format_days(row.days), row.days),
row.days,
row.start.format(TIMESTAMP_HUMAN).unwrap()
]
}
fn code(&self) -> &'static str {
include_str!("userarticlestreaks.rs")
}
}
#[test]
fn test_format() {
use time::macros::date;
assert_eq!(
"20230115000000",
&date!(2023 - 01 - 15).format(TIMESTAMP_DB).unwrap()
);
assert_eq!(
"20230115%",
&date!(2023 - 01 - 15).format(TIMESTAMP_LIKE).unwrap()
);
assert_eq!(
"January 15, 2023",
&date!(2023 - 01 - 15).format(TIMESTAMP_HUMAN).unwrap()
);
}