All checks were successful
pipeline / build-and-push-images (push) Successful in 5m30s
145 lines
4.3 KiB
JavaScript
145 lines
4.3 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
const { MongoClient, ObjectId } = require('mongodb');
|
|
const fs = require('fs');
|
|
const { createReadStream } = require('fs');
|
|
const { createInterface } = require('readline');
|
|
const path = require('path');
|
|
|
|
/**
|
|
* Stream-based import of large JSON files
|
|
* Optimized for 9GB+ files with minimal memory usage
|
|
*/
|
|
async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
|
|
console.log(` 📁 File: ${filePath}`);
|
|
console.log(` 📦 Collection: ${collectionName}`);
|
|
console.log(` 🔄 Batch Size: ${batchSize}`);
|
|
|
|
const startTime = Date.now();
|
|
let processed = 0;
|
|
let skipped = 0;
|
|
|
|
// Connect to MongoDB
|
|
const client = new MongoClient(process.env.MONGO_URI || 'mongodb://root:password@localhost:27017/buildpath?authSource=admin');
|
|
await client.connect();
|
|
|
|
const db = client.db('matches');
|
|
const collection = db.collection(collectionName);
|
|
|
|
try {
|
|
// Create indexes first for better performance
|
|
await collection.createIndex({ "metadata.matchId": 1 }, { unique: true });
|
|
await collection.createIndex({ "info.gameDuration": 1 });
|
|
await collection.createIndex({ "info.participants.championId": 1 });
|
|
await collection.createIndex({ "info.participants.win": 1 });
|
|
|
|
// Check file size
|
|
const fileStats = fs.statSync(filePath);
|
|
const fileSize = (fileStats.size / (1024 * 1024 * 1024)).toFixed(2);
|
|
console.log(` 📊 File size: ${fileSize} GB`);
|
|
|
|
await processLineDelimitedFormat(filePath, collection, batchSize, startTime);
|
|
|
|
const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
console.log(`🎉 Import complete in ${totalTime} seconds`);
|
|
console.log(`✅ Processed: ${processed.toLocaleString()} matches`);
|
|
if (skipped > 0) {
|
|
console.log(`⚠️ Skipped: ${skipped.toLocaleString()} invalid entries`);
|
|
}
|
|
} catch (error) {
|
|
console.error('❌ Import failed:', error);
|
|
process.exit(1);
|
|
} finally {
|
|
await client.close();
|
|
}
|
|
|
|
async function processLineDelimitedFormat(filePath, collection, batchSize, startTime) {
|
|
const fileStream = createReadStream(filePath);
|
|
const rl = createInterface({
|
|
input: fileStream,
|
|
crlfDelay: Infinity
|
|
});
|
|
|
|
let batch = [];
|
|
let lineCount = 0;
|
|
|
|
for await (const line of rl) {
|
|
lineCount++;
|
|
process.stdout.write(`\r Processing line ${lineCount.toLocaleString()}... `);
|
|
|
|
try {
|
|
if (line.trim() === '') continue;
|
|
|
|
const match = JSON.parse(line);
|
|
if (!match.metadata || !match.metadata.matchId) {
|
|
skipped++;
|
|
continue;
|
|
}
|
|
|
|
// Convert $oid fields to proper ObjectId format
|
|
if (match._id && match._id.$oid) {
|
|
match._id = new ObjectId(match._id.$oid);
|
|
}
|
|
|
|
batch.push(match);
|
|
|
|
if (batch.length >= batchSize) {
|
|
process.stdout.write(`\r Inserting batch into MongoDB... `);
|
|
await insertBatch(batch, collection);
|
|
batch = [];
|
|
}
|
|
} catch (error) {
|
|
skipped++;
|
|
}
|
|
}
|
|
|
|
// Insert remaining matches
|
|
if (batch.length > 0) {
|
|
await insertBatch(batch, collection);
|
|
}
|
|
}
|
|
|
|
async function insertBatch(batch, collection) {
|
|
if (batch.length === 0) return;
|
|
|
|
try {
|
|
const result = await collection.insertMany(batch, {
|
|
ordered: false, // Continue on errors
|
|
writeConcern: { w: 1 } // Acknowledge writes
|
|
});
|
|
|
|
return result;
|
|
} catch (error) {
|
|
if (error.code === 11000) {
|
|
// Duplicate matches - skip
|
|
return;
|
|
}
|
|
console.error(`❌ Batch insert error: ${error.message}`);
|
|
throw error;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Run the import if called directly
|
|
if (require.main === module) {
|
|
const args = process.argv.slice(2);
|
|
if (args.length < 2) {
|
|
console.log('Usage: node process-matches.js <file-path> <collection-name> [batch-size]');
|
|
console.log('Example: node process-matches.js ../data/16_1_1_matches.json 16.1.1 1000');
|
|
process.exit(1);
|
|
}
|
|
|
|
const filePath = path.resolve(args[0]);
|
|
const collectionName = args[1];
|
|
const batchSize = args[2] ? parseInt(args[2]) : 1000;
|
|
|
|
importLargeJsonFile(filePath, collectionName, batchSize)
|
|
.then(() => process.exit(0))
|
|
.catch((error) => {
|
|
console.error('Import failed:', error);
|
|
process.exit(1);
|
|
});
|
|
}
|
|
|
|
module.exports = { importLargeJsonFile };
|