#!/usr/bin/env node const { MongoClient, ObjectId } = require('mongodb'); const fs = require('fs'); const { createReadStream } = require('fs'); const { createInterface } = require('readline'); const path = require('path'); /** * Stream-based import of large JSON files * Optimized for 9GB+ files with minimal memory usage */ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) { console.log(` 📁 File: ${filePath}`); console.log(` 📦 Collection: ${collectionName}`); console.log(` 🔄 Batch Size: ${batchSize}`); const startTime = Date.now(); let processed = 0; let skipped = 0; // Connect to MongoDB const client = new MongoClient(process.env.MONGO_URI || 'mongodb://root:password@localhost:27017/buildpath?authSource=admin'); await client.connect(); const db = client.db('matches'); const collection = db.collection(collectionName); try { // Create indexes first for better performance await collection.createIndex({ "metadata.matchId": 1 }, { unique: true }); await collection.createIndex({ "info.gameDuration": 1 }); await collection.createIndex({ "info.participants.championId": 1 }); await collection.createIndex({ "info.participants.win": 1 }); // Check file size const fileStats = fs.statSync(filePath); const fileSize = (fileStats.size / (1024 * 1024 * 1024)).toFixed(2); console.log(` 📊 File size: ${fileSize} GB`); await processLineDelimitedFormat(filePath, collection, batchSize, startTime); const totalTime = ((Date.now() - startTime) / 1000).toFixed(1); console.log(`🎉 Import complete in ${totalTime} seconds`); console.log(`✅ Processed: ${processed.toLocaleString()} matches`); if (skipped > 0) { console.log(`⚠️ Skipped: ${skipped.toLocaleString()} invalid entries`); } } catch (error) { console.error('❌ Import failed:', error); process.exit(1); } finally { await client.close(); } async function processLineDelimitedFormat(filePath, collection, batchSize, startTime) { const fileStream = createReadStream(filePath); const rl = createInterface({ input: fileStream, crlfDelay: Infinity }); let batch = []; let lineCount = 0; for await (const line of rl) { lineCount++; process.stdout.write(`\r Processing line ${lineCount.toLocaleString()}... `); try { if (line.trim() === '') continue; const match = JSON.parse(line); if (!match.metadata || !match.metadata.matchId) { skipped++; continue; } // Convert $oid fields to proper ObjectId format if (match._id && match._id.$oid) { match._id = new ObjectId(match._id.$oid); } batch.push(match); if (batch.length >= batchSize) { process.stdout.write(`\r Inserting batch into MongoDB... `); await insertBatch(batch, collection); batch = []; } } catch (error) { skipped++; } } // Insert remaining matches if (batch.length > 0) { await insertBatch(batch, collection); } } async function insertBatch(batch, collection) { if (batch.length === 0) return; try { const result = await collection.insertMany(batch, { ordered: false, // Continue on errors writeConcern: { w: 1 } // Acknowledge writes }); return result; } catch (error) { if (error.code === 11000) { // Duplicate matches - skip return; } console.error(`❌ Batch insert error: ${error.message}`); throw error; } } } // Run the import if called directly if (require.main === module) { const args = process.argv.slice(2); if (args.length < 2) { console.log('Usage: node process-matches.js [batch-size]'); console.log('Example: node process-matches.js ../data/16_1_1_matches.json 16.1.1 1000'); process.exit(1); } const filePath = path.resolve(args[0]); const collectionName = args[1]; const batchSize = args[2] ? parseInt(args[2]) : 1000; importLargeJsonFile(filePath, collectionName, batchSize) .then(() => process.exit(0)) .catch((error) => { console.error('Import failed:', error); process.exit(1); }); } module.exports = { importLargeJsonFile };