Files
buildpath/dev/scripts/process-matches.js
Valentin Haudiquet 564de90ecb
All checks were successful
pipeline / lint-and-format (push) Successful in 4m16s
pipeline / build-and-push-images (push) Successful in 47s
dev: update dev script for auto-download
2026-02-05 14:28:20 +01:00

149 lines
4.4 KiB
JavaScript

#!/usr/bin/env node
import { MongoClient, ObjectId } from 'mongodb';
import fs from 'fs';
import { createReadStream } from 'fs';
import { createInterface } from 'readline';
import path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
/**
* Stream-based import of large JSON files
* Optimized for 9GB+ files with minimal memory usage
*/
async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
console.log(` 📁 File: ${filePath}`);
console.log(` 📦 Collection: ${collectionName}`);
console.log(` 🔄 Batch Size: ${batchSize}`);
const startTime = Date.now();
let processed = 0;
let skipped = 0;
// Connect to MongoDB
const client = new MongoClient(process.env.MONGO_URI || 'mongodb://root:password@localhost:27017/buildpath?authSource=admin');
await client.connect();
const db = client.db('matches');
const collection = db.collection(collectionName);
try {
// Create indexes first for better performance
await collection.createIndex({ "metadata.matchId": 1 }, { unique: true });
await collection.createIndex({ "info.gameDuration": 1 });
await collection.createIndex({ "info.participants.championId": 1 });
await collection.createIndex({ "info.participants.win": 1 });
// Check file size
const fileStats = fs.statSync(filePath);
const fileSize = (fileStats.size / (1024 * 1024 * 1024)).toFixed(2);
console.log(` 📊 File size: ${fileSize} GB`);
await processLineDelimitedFormat(filePath, collection, batchSize, startTime);
const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
console.log(`🎉 Import complete in ${totalTime} seconds`);
console.log(`✅ Processed: ${processed.toLocaleString()} matches`);
if (skipped > 0) {
console.log(`⚠️ Skipped: ${skipped.toLocaleString()} invalid entries`);
}
} catch (error) {
console.error('❌ Import failed:', error);
process.exit(1);
} finally {
await client.close();
}
async function processLineDelimitedFormat(filePath, collection, batchSize, startTime) {
const fileStream = createReadStream(filePath);
const rl = createInterface({
input: fileStream,
crlfDelay: Infinity
});
let batch = [];
let lineCount = 0;
for await (const line of rl) {
lineCount++;
process.stdout.write(`\r Processing line ${lineCount.toLocaleString()}... `);
try {
if (line.trim() === '') continue;
const match = JSON.parse(line);
if (!match.metadata || !match.metadata.matchId) {
skipped++;
continue;
}
// Convert $oid fields to proper ObjectId format
if (match._id && match._id.$oid) {
match._id = new ObjectId(match._id.$oid);
}
batch.push(match);
if (batch.length >= batchSize) {
process.stdout.write(`\r Inserting batch into MongoDB... `);
await insertBatch(batch, collection);
batch = [];
}
} catch (error) {
skipped++;
}
}
// Insert remaining matches
if (batch.length > 0) {
await insertBatch(batch, collection);
}
}
async function insertBatch(batch, collection) {
if (batch.length === 0) return;
try {
const result = await collection.insertMany(batch, {
ordered: false, // Continue on errors
writeConcern: { w: 1 } // Acknowledge writes
});
return result;
} catch (error) {
if (error.code === 11000) {
// Duplicate matches - skip
return;
}
console.error(`❌ Batch insert error: ${error.message}`);
throw error;
}
}
}
// Run the import if called directly
if (import.meta.url === `file://${process.argv[1]}`) {
const args = process.argv.slice(2);
if (args.length < 2) {
console.log('Usage: node process-matches.js <file-path> <collection-name> [batch-size]');
console.log('Example: node process-matches.js ../data/16_1_1_matches.json 16.1.1 1000');
process.exit(1);
}
const filePath = path.resolve(args[0]);
const collectionName = args[1];
const batchSize = args[2] ? parseInt(args[2]) : 1000;
importLargeJsonFile(filePath, collectionName, batchSize)
.then(() => process.exit(0))
.catch((error) => {
console.error('Import failed:', error);
process.exit(1);
});
}
export { importLargeJsonFile };