I'm somewhat obsessed with stats, and one of the things I look at is my rate of publishing overtime. I've run this blog since 2003 and have gone from blogging 30+ times a month to - well - somewhat less than that. Quality is - of course - far more important than quantity. But as a general stat, I just like to know how much I'm writing.

My static site generator of choice, Hugo, doesn't have anything built in to support getting this. You can get post counts and stuff like, but I wanted something a bit deeper, and something more focused on the amount of content published over time. So with that in mind, I wrote the following script. If your Hugo site follows the same convention as mine (year/month/day folders), then in theory, it should just work for you.


/*
Step one - read all the md files in content/post
We assume yyyy/mmmm/dd
*/

var fs = require('fs');
var path = './content/post/';
console.log('Scan '+path);

var data = {
	years:{},
	months:[],
	posts:0,
	categories:{},
	tags:{},
	wordCount:0
};

years = fs.readdirSync(path);
years.forEach((year) => {
	//console.log('working on '+year);
	data.years[year] = 0;
	months = fs.readdirSync(path+year);
	months.forEach((month) => {
		let bareMonth = Number(month)-1;
		if(!data.months[bareMonth]) data.months[bareMonth] = 0;

		//console.log('working on '+month);
		days = fs.readdirSync(path+year+'/'+month);
		days.forEach((day) => {
			posts = fs.readdirSync(path+year+'/'+month+'/'+day);
			data.posts += posts.length;
			data.years[year] += posts.length;
			data.months[bareMonth] += posts.length;
			posts.forEach((file) => {
				let content = fs.readFileSync(path+year+'/'+month+'/'+day+'/'+file,"utf8");
				// get the front matter
				let closingBracket = content.indexOf("}", 1);
				let fm = content.substring(0, closingBracket+1);
				let rest = content.replace(fm, '');

				let fmData = JSON.parse(fm);
				if(fmData.categories) {
					fmData.categories.forEach((cat) => {
						if(!data.categories[cat]) data.categories[cat] = 0;
						data.categories[cat]++;
					});
				}
				if(fmData.tags) {
					fmData.tags.forEach((tag) => {
						if(!data.tags[tag]) data.tags[tag] = 0;
						data.tags[tag]++;
					});
				}

				data.wordCount += rest.split(' ').length;
			});
		});
	});

});

data.avgWordCount = data.wordCount / data.posts;

console.log(data);

Basically I just iterate over every year, month, and day, and then open up each file. Hugo stores metadata, or "front matter", on top of each blog post in a JSON string. I can read that, parse it, and then figure out what categories and tags are being used. I can then strip that out and get a basic word count too.

The end result is just an object containing the number of posts per year, month, tag, and category. I also store the total word count (why not?) and an average.

As a reminder, you should typically avoid using sync functions in Node, but as this was a simple script just for me, I went for simplicity and I'm ok with that.

Then - for the heck of it - I whipped up a simple stats page to render the data. You can just click that link, but here are the four reports. For the first two, I literally copied and pasted the quick start code from Google's Charting library and modified it slightly.

By Year

By Month

Here's the top portion of my categories and tags stats:

Categories Tags

And finally - a few generic stats:

Generic

The only real interesting part of this page is how I'm handling number formatting - I'm using Intl - a kick ass built in standard for internationalization for the web. Here's how I make it fail gracefully:


var formatter;
if(window.Intl) {
	formatter = new Intl.NumberFormat();
} else {
	formatter = {
		format:function(x) { return x; }
	};
}

I can then just do formatter.format(x) to get my nicely formatted numbers.

Anyway - is this useful to folks using Hugo? Any suggestions?