feat/match_collector: batch match import

2026-06-27 12:33:36 +02:00
parent e6ddc27d5c
commit d878af6d1a
3 changed files with 118 additions and 23 deletions
@@ -31,19 +31,29 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
  const collection = db.collection(collectionName);

  try {
-    // Create indexes first for better performance
-    await collection.createIndex({ "metadata.matchId": 1 }, { unique: true });
-    await collection.createIndex({ "info.gameDuration": 1 });
-    await collection.createIndex({ "info.participants.championId": 1 });
-    await collection.createIndex({ "info.participants.win": 1 });
-
-    // Check file size
+    // Check file size first
    const fileStats = fs.statSync(filePath);
    const fileSize = (fileStats.size / (1024 * 1024 * 1024)).toFixed(2);
    console.log(`  📊 File size: ${fileSize} GB`);

+    // Defer index creation to after import to reduce memory pressure
+    // Only create the unique matchId index before import to prevent duplicates
+    console.log(`  📇 Creating unique matchId index...`);
+    await collection.createIndex({ "metadata.matchId": 1 }, { unique: true, background: false });
+
    await processLineDelimitedFormat(filePath, collection, batchSize, startTime);

+    // Create additional indexes after import to reduce memory pressure
+    console.log(`\n  📇 Creating additional indexes (this may take a while)...`);
+    try {
+      await collection.createIndex({ "info.gameDuration": 1 }, { background: true });
+      await collection.createIndex({ "info.participants.championId": 1 }, { background: true });
+      await collection.createIndex({ "info.participants.win": 1 }, { background: true });
+      console.log(`  ✅ Indexes created successfully`);
+    } catch (indexError) {
+      console.log(`  ⚠️ Warning: Could not create additional indexes: ${indexError.message}`);
+    }
+
    const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
    console.log(`🎉 Import complete in ${totalTime} seconds`);
    console.log(`✅ Processed: ${processed.toLocaleString()} matches`);
@@ -66,6 +76,7 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {

    let batch = [];
    let lineCount = 0;
+    let batchCount = 0;

    for await (const line of rl) {
      lineCount++;
@@ -88,9 +99,16 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {
          batch.push(match);

        if (batch.length >= batchSize) {
-          process.stdout.write(`\r  Inserting batch into MongoDB...    `);
+          batchCount++;
+          process.stdout.write(`\r  Inserting batch #${batchCount} (${batch.length} matches)...    `);
          await insertBatch(batch, collection);
          batch = [];
+          
+          // Force garbage collection hint every 10 batches by yielding to the event loop
+          // This helps reduce memory pressure when processing large files
+          if (batchCount % 10 === 0) {
+            await new Promise(resolve => setImmediate(resolve));
+          }
        }
      } catch (error) {
        skipped++;
@@ -99,8 +117,11 @@ async function importLargeJsonFile(filePath, collectionName, batchSize = 1000) {

    // Insert remaining matches
    if (batch.length > 0) {
+      process.stdout.write(`\r  Inserting final batch (${batch.length} matches)...    `);
      await insertBatch(batch, collection);
    }
+    
+    console.log(`\n  📊 Total batches inserted: ${batchCount + 1}`);
  }

  async function insertBatch(batch, collection) {