feat/match_collector: batch match import
This commit is contained in:
@@ -31,19 +31,29 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
|
||||
const collection = db.collection(collectionName);
|
||||
|
||||
try {
|
||||
// Create indexes first for better performance
|
||||
await collection.createIndex({ "metadata.matchId": 1 }, { unique: true });
|
||||
await collection.createIndex({ "info.gameDuration": 1 });
|
||||
await collection.createIndex({ "info.participants.championId": 1 });
|
||||
await collection.createIndex({ "info.participants.win": 1 });
|
||||
|
||||
// Check file size
|
||||
// Check file size first
|
||||
const fileStats = fs.statSync(filePath);
|
||||
const fileSize = (fileStats.size / (1024 * 1024 * 1024)).toFixed(2);
|
||||
console.log(` 📊 File size: ${fileSize} GB`);
|
||||
|
||||
// Defer index creation to after import to reduce memory pressure
|
||||
// Only create the unique matchId index before import to prevent duplicates
|
||||
console.log(` 📇 Creating unique matchId index...`);
|
||||
await collection.createIndex({ "metadata.matchId": 1 }, { unique: true, background: false });
|
||||
|
||||
await processLineDelimitedFormat(filePath, collection, batchSize, startTime);
|
||||
|
||||
// Create additional indexes after import to reduce memory pressure
|
||||
console.log(`\n 📇 Creating additional indexes (this may take a while)...`);
|
||||
try {
|
||||
await collection.createIndex({ "info.gameDuration": 1 }, { background: true });
|
||||
await collection.createIndex({ "info.participants.championId": 1 }, { background: true });
|
||||
await collection.createIndex({ "info.participants.win": 1 }, { background: true });
|
||||
console.log(` ✅ Indexes created successfully`);
|
||||
} catch (indexError) {
|
||||
console.log(` ⚠️ Warning: Could not create additional indexes: ${indexError.message}`);
|
||||
}
|
||||
|
||||
const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
console.log(`🎉 Import complete in ${totalTime} seconds`);
|
||||
console.log(`✅ Processed: ${processed.toLocaleString()} matches`);
|
||||
@@ -66,6 +76,7 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
|
||||
|
||||
let batch = [];
|
||||
let lineCount = 0;
|
||||
let batchCount = 0;
|
||||
|
||||
for await (const line of rl) {
|
||||
lineCount++;
|
||||
@@ -88,9 +99,16 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
|
||||
batch.push(match);
|
||||
|
||||
if (batch.length >= batchSize) {
|
||||
process.stdout.write(`\r Inserting batch into MongoDB... `);
|
||||
batchCount++;
|
||||
process.stdout.write(`\r Inserting batch #${batchCount} (${batch.length} matches)... `);
|
||||
await insertBatch(batch, collection);
|
||||
batch = [];
|
||||
|
||||
// Force garbage collection hint every 10 batches by yielding to the event loop
|
||||
// This helps reduce memory pressure when processing large files
|
||||
if (batchCount % 10 === 0) {
|
||||
await new Promise(resolve => setImmediate(resolve));
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
skipped++;
|
||||
@@ -99,8 +117,11 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
|
||||
|
||||
// Insert remaining matches
|
||||
if (batch.length > 0) {
|
||||
process.stdout.write(`\r Inserting final batch (${batch.length} matches)... `);
|
||||
await insertBatch(batch, collection);
|
||||
}
|
||||
|
||||
console.log(`\n 📊 Total batches inserted: ${batchCount + 1}`);
|
||||
}
|
||||
|
||||
async function insertBatch(batch, collection) {
|
||||
|
||||
Reference in New Issue
Block a user