type ItemTree = { data: number | undefined count: number children: Array } function treeInit(): ItemTree { return { data: undefined, count: 0, children: [] } } function treeNode(data: number, count: number): ItemTree { return { data: data, count: count, children: [] } } /* * Merge a node with an item tree */ function nodeMerge(itemtree: ItemTree, node: ItemTree) { const item = node.data const count = node.count let next: ItemTree | null = null // Try to find an existing node in this tree level with same item for (const node of itemtree.children) { if (node.data == item) { node.count += 1 next = node break } } // If not found, add item node at this level if (next == null) { next = treeNode(item, count) itemtree.children.push(next) } return next } /* * Merge a full build path with an existing item tree */ function treeMerge(itemtree: ItemTree, items: Array) { let current = itemtree for (const item of items) { current = nodeMerge(current, { data: item, count: 1, children: [] }) } } function treeCutBranches(itemtree: ItemTree, thresholdCount: number, thresholdPerc: number) { // Remove branches that are above threshold count while (itemtree.children.length > thresholdCount) { const leastUsedBranch = itemtree.children.reduce( (a, b) => (Math.min(a.count, b.count) == a.count ? a : b), { data: undefined, count: +Infinity, children: [] } ) itemtree.children.splice(itemtree.children.indexOf(leastUsedBranch), 1) } // Remove branches that are of too low usage const toRemove: Array = [] for (const child of itemtree.children) { if (child.count / itemtree.count < thresholdPerc) { toRemove.push(child) } } for (const tr of toRemove) { itemtree.children.splice(itemtree.children.indexOf(tr), 1) } itemtree.children.map(x => treeCutBranches(x, thresholdCount, thresholdPerc)) } function treeSort(itemtree: ItemTree) { itemtree.children.sort((a, b) => b.count - a.count) for (const item of itemtree.children) { treeSort(item) } } /* * Deep clone an ItemTree */ function treeClone(tree: ItemTree): ItemTree { return { data: tree.data, count: tree.count, children: tree.children.map(child => treeClone(child)) } } /* * Merge two ItemTrees into one */ function treeMergeTree(t1: ItemTree, t2: ItemTree): ItemTree { // Merge counts for the root t1.count += t2.count // Merge children from t2 into t1 for (const child2 of t2.children) { // Find matching child in t1 (same data value) const matchingChild = t1.children.find(child1 => child1.data === child2.data) if (matchingChild) { // Recursively merge matching children treeMergeTree(matchingChild, child2) } else { // Add a deep copy of child2 to t1 t1.children.push(treeClone(child2)) } } return t1 } /* * Flatten an ItemTree into a Set of item numbers */ function treeToSet(itemtree: ItemTree): Set { const items: Set = new Set() function traverse(node: ItemTree) { if (node.data !== undefined) { items.add(node.data) } for (const child of node.children) { traverse(child) } } traverse(itemtree) return items } /* * Calculate similarity between two trees as item sets. * Returns a number between 0 and 1, where 1 means identical and 0 means completely different. * Uses Jaccard similarity: |A ∩ B| / |A ∪ B| * Sets included in one another will have similarity close to 1. */ function areTreeSimilars(t1: ItemTree, t2: ItemTree): number { const set1 = treeToSet(t1) const set2 = treeToSet(t2) // Handle empty sets if (set1.size === 0 && set2.size === 0) { return 1.0 } // Calculate intersection const intersection = new Set() for (const item of Array.from(set1)) { if (set2.has(item)) { intersection.add(item) } } // Calculate union const union = new Set() for (const item of Array.from(set1)) { union.add(item) } for (const item of Array.from(set2)) { union.add(item) } // Jaccard similarity: |intersection| / |union| const similarity = intersection.size / Math.min(set1.size, set2.size) // Ensure result is between 0 and 1 return Math.max(0, Math.min(1, similarity)) } export { ItemTree, treeMerge, treeInit, treeCutBranches, treeSort, treeMergeTree, areTreeSimilars }