#!/usr/bin/env node /** * Import Google Analytics pageview data into Firestore * * Usage: * 1. Export CSV from GA4: Reports → Engagement → Pages and screens → Download CSV * 2. Get Firebase service account key from Firebase Console → Project Settings → Service accounts * 3. Run: node seed-firebase-views.js * * The script maps GA page paths to Blowfish document IDs: * /docs/configuration/ → views_docs-configuration-index.md */ import { initializeApp, cert } from 'firebase-admin/app'; import { getFirestore } from 'firebase-admin/firestore'; import { createReadStream, existsSync, readFileSync } from 'fs'; import { parse } from 'csv-parse'; import { dirname, join } from 'path'; import { fileURLToPath } from 'url'; const __dirname = dirname(fileURLToPath(import.meta.url)); // Configuration const SERVICE_ACCOUNT_PATH = join(__dirname, 'serviceAccountKey.json'); const COLLECTION_NAME = 'views'; const DRY_RUN = process.argv.includes('--dry-run'); const FORCE = process.argv.includes('--force'); // Overwrite existing documents // Check for required files if (!existsSync(SERVICE_ACCOUNT_PATH)) { console.error('Error: serviceAccountKey.json not found in scripts directory'); console.error('Download it from Firebase Console → Project Settings → Service accounts'); process.exit(1); } const csvPath = process.argv.find(arg => !arg.startsWith('--') && !arg.includes('node') && !arg.includes('seed-firebase-views')); if (!csvPath) { console.error('Usage: node seed-firebase-views.js [options]'); console.error(''); console.error('Options:'); console.error(' --dry-run Preview what would be imported without writing to Firestore'); console.error(' --force Overwrite existing documents (useful for re-importing)'); process.exit(1); } if (!existsSync(csvPath)) { console.error(`Error: CSV file not found: ${csvPath}`); process.exit(1); } // Initialize Firebase Admin const serviceAccount = JSON.parse(readFileSync(SERVICE_ACCOUNT_PATH, 'utf8')); initializeApp({ credential: cert(serviceAccount) }); const db = getFirestore(); // Language prefixes used in Hugo multilingual setup // These get stripped from URLs to normalize to the base path const LANGUAGE_PREFIXES = ['en', 'fr', 'zh-cn', 'es', 'de', 'it', 'ja', 'pt-pt', 'pt-br', 'bg', 'bn', 'fa', 'he', 'hu', 'id', 'pl', 'ro', 'ru', 'th', 'tr', 'uk', 'vi', 'zh-tw']; /** * Strip language prefix from a URL path * /pt-pt/docs/configuration/ → /docs/configuration/ * /docs/configuration/ → /docs/configuration/ (unchanged) */ function stripLanguagePrefix(pagePath) { for (const lang of LANGUAGE_PREFIXES) { const prefix = `/${lang}/`; if (pagePath.startsWith(prefix)) { return '/' + pagePath.slice(prefix.length); } // Also handle case where lang is at root like /en or /pt-pt if (pagePath === `/${lang}` || pagePath === `/${lang}/`) { return '/'; } } return pagePath; } /** * Convert a GA page path to a Blowfish Firestore document ID * * Blowfish uses .File.Path which includes "index.md" for page bundles: * GA path: /docs/configuration/ * Hugo file: docs/configuration/index.md * Firestore ID: views_docs-configuration-index.md */ function pathToDocId(pagePath) { // First strip any language prefix pagePath = stripLanguagePrefix(pagePath); // Remove leading/trailing slashes let cleanPath = pagePath.replace(/^\/|\/$/g, ''); // Handle special cases if (cleanPath === '' || cleanPath === '/') { // Homepage: _index.md return 'views__index.md'; } // For section pages like /docs/, the file is docs/_index.md // For article pages like /docs/configuration/, the file is docs/configuration/index.md // We can tell them apart: section pages don't have a second path segment after the section const parts = cleanPath.split('/'); if (parts.length === 1) { // Section page like "docs" → docs/_index.md cleanPath = `${cleanPath}-_index.md`; } else { // Article page like "docs/configuration" → docs/configuration/index.md → docs-configuration-index.md cleanPath = cleanPath.replace(/\//g, '-') + '-index.md'; } // Replace any remaining slashes with hyphens cleanPath = cleanPath.replace(/\//g, '-'); return `views_${cleanPath}`; } /** * Parse the GA4 CSV export * GA4 exports have metadata lines starting with # before the actual CSV data * Aggregates views from different language URLs that map to the same document */ async function parseGACSV(filePath) { // Use a Map to aggregate views by document ID const viewsByDocId = new Map(); // Track original paths for debugging const pathsByDocId = new Map(); return new Promise((resolve, reject) => { createReadStream(filePath) .pipe(parse({ columns: true, skip_empty_lines: true, trim: true, relax_column_count: true, comment: '#' // Skip lines starting with # })) .on('data', (row) => { // GA4 uses various column names depending on the report const pagePath = row['Page path'] || row['Page path and screen class'] || row['Page'] || row['Landing page']; const views = row['Views'] || row['Pageviews'] || row['Sessions'] || row['Screen views']; if (pagePath && views) { // Clean up the path and parse views const cleanPath = pagePath.trim(); const viewCount = parseInt(views.replace(/,/g, ''), 10); // Only include valid paths (skip query strings, fragments, etc.) if (cleanPath.startsWith('/') && !isNaN(viewCount) && viewCount > 0) { // Strip language prefix to get normalized path const normalizedPath = stripLanguagePrefix(cleanPath); // Check if this is a page we want to import // Include docs, samples, and other sections relevant to blowfish.page const validPrefixes = ['/docs/', '/samples/', '/users/', '/contributors/', '/tags/', '/authors/']; const isValidPath = normalizedPath === '/' || validPrefixes.some(p => normalizedPath.startsWith(p)) || validPrefixes.some(p => normalizedPath === p.slice(0, -1)); // /docs without trailing slash if (isValidPath) { const docId = pathToDocId(cleanPath); // Take the MAX views (not sum) - handles duplicate URLs like with/without trailing slash const existingViews = viewsByDocId.get(docId) || 0; viewsByDocId.set(docId, Math.max(existingViews, viewCount)); // Track paths for debugging const existingPaths = pathsByDocId.get(docId) || []; existingPaths.push({ path: cleanPath, views: viewCount }); pathsByDocId.set(docId, existingPaths); } } } }) .on('end', () => { // Convert Map to array const results = []; for (const [docId, totalViews] of viewsByDocId) { const paths = pathsByDocId.get(docId); results.push({ docId, views: totalViews, sourcePaths: paths // For debugging - shows which URLs were aggregated }); } // Sort by views descending results.sort((a, b) => b.views - a.views); resolve(results); }) .on('error', reject); }); } /** * Import data to Firestore */ async function importToFirestore(data) { console.log(`\nImporting ${data.length} documents to Firestore...`); if (FORCE) { console.log('FORCE mode: will overwrite existing documents\n'); } // Use batched writes for efficiency (max 500 per batch) const batchSize = 500; let imported = 0; let updated = 0; let skipped = 0; for (let i = 0; i < data.length; i += batchSize) { const batch = db.batch(); const chunk = data.slice(i, i + batchSize); for (const item of chunk) { const docRef = db.collection(COLLECTION_NAME).doc(item.docId); // Check if document already exists const existing = await docRef.get(); if (existing.exists) { if (FORCE) { batch.set(docRef, { views: item.views }); updated++; } else { console.log(` Skipping ${item.docId} (already exists with ${existing.data().views} views)`); skipped++; continue; } } else { batch.set(docRef, { views: item.views }); imported++; } } if (!DRY_RUN) { await batch.commit(); } console.log(` Processed ${Math.min(i + batchSize, data.length)}/${data.length}`); } return { imported, updated, skipped }; } // Main execution async function main() { console.log('Google Analytics to Firestore Import'); console.log('===================================='); if (DRY_RUN) { console.log('DRY RUN MODE - No changes will be made'); } if (FORCE) { console.log('FORCE MODE - Will overwrite existing documents'); } console.log(''); console.log(`Reading CSV: ${csvPath}`); const data = await parseGACSV(csvPath); if (data.length === 0) { console.error('\nNo valid data found in CSV. Check the file format.'); console.error('Expected columns: "Page path" (or similar) and "Views" (or similar)'); process.exit(1); } console.log(`\nFound ${data.length} unique pages to import:`); console.log(''); // Show preview with aggregation info const preview = data.slice(0, 15); for (const item of preview) { console.log(` ${item.docId}`); console.log(` Views: ${item.views.toLocaleString()}`); if (item.sourcePaths.length > 1) { console.log(` (max from ${item.sourcePaths.length} URL variants):`); for (const sp of item.sourcePaths.slice(0, 3)) { console.log(` - ${sp.path}: ${sp.views.toLocaleString()}`); } if (item.sourcePaths.length > 3) { console.log(` ... and ${item.sourcePaths.length - 3} more`); } } } if (data.length > 15) { console.log(` ... and ${data.length - 15} more`); } // Show total views to verify parsing const totalViews = data.reduce((sum, item) => sum + item.views, 0); console.log(`\nTotal views across all pages: ${totalViews.toLocaleString()}`) if (DRY_RUN) { console.log('\n✓ Dry run complete. Run without --dry-run to import.'); return; } const { imported, updated, skipped } = await importToFirestore(data); console.log('\n===================================='); console.log(`✓ Import complete!`); console.log(` New documents: ${imported}`); if (updated > 0) { console.log(` Updated (--force): ${updated}`); } if (skipped > 0) { console.log(` Skipped (already exist): ${skipped}`); } } main().catch((error) => { console.error('Error:', error.message); process.exit(1); });