@@ -12,6 +12,8 @@ interface BenchmarkResults {
1212 median : number ;
1313 min : number ;
1414 max : number ;
15+ standardDeviation : number ;
16+ confidenceInterval95 : { lower : number ; upper : number } ;
1517 totalDuration : number ;
1618 } ;
1719 documentFormatting : {
@@ -23,6 +25,8 @@ interface BenchmarkResults {
2325 median : number ;
2426 min : number ;
2527 max : number ;
28+ standardDeviation : number ;
29+ confidenceInterval95 : { lower : number ; upper : number } ;
2630 totalDuration : number ;
2731 } ;
2832 } ;
@@ -36,11 +40,11 @@ class PerformanceBenchmark {
3640 private overallStartTime : bigint = BigInt ( 0 ) ;
3741
3842 private readonly baseline = {
39- factoryCreation : { average : 0.0001675 , median : 0.0001522 } ,
43+ factoryCreation : { average : 0.0001675 , median : 0.0001522 , standardDeviation : 0.0001 } ,
4044 documentFormatting : {
41- small : { average : 0.1429 , median : 0.1162 } ,
42- medium : { average : 0.2358 , median : 0.2186 } ,
43- large : { average : 4.2929 , median : 2.2453 }
45+ small : { average : 0.1429 , median : 0.1162 , standardDeviation : 0.05 } ,
46+ medium : { average : 0.2358 , median : 0.2186 , standardDeviation : 0.08 } ,
47+ large : { average : 4.2929 , median : 2.2453 , standardDeviation : 2.0 }
4448 }
4549 } ;
4650
@@ -55,6 +59,8 @@ class PerformanceBenchmark {
5559 median : 0 ,
5660 min : 0 ,
5761 max : 0 ,
62+ standardDeviation : 0 ,
63+ confidenceInterval95 : { lower : 0 , upper : 0 } ,
5864 totalDuration : 0
5965 } ,
6066 documentFormatting : { } ,
@@ -150,6 +156,13 @@ class PerformanceBenchmark {
150156
151157 async benchmarkFactoryCreation ( iterations : number = 100000 ) : Promise < void > {
152158 console . log ( '📦 Testing factory creation overhead...' ) ;
159+
160+ // Warmup phase to mitigate JIT compilation effects
161+ console . log ( ' 🔥 Warming up JIT compiler...' ) ;
162+ for ( let i = 0 ; i < Math . min ( 10000 , iterations / 10 ) ; i ++ ) {
163+ getDocumentPrettyfier ( ) ;
164+ }
165+
153166 const times : number [ ] = [ ] ;
154167 const sectionStart = process . hrtime . bigint ( ) ;
155168
@@ -163,13 +176,23 @@ class PerformanceBenchmark {
163176 const sectionEnd = process . hrtime . bigint ( ) ;
164177 const totalDuration = Number ( sectionEnd - sectionStart ) / 1_000_000 ; // Convert to milliseconds
165178
179+ // Remove outliers for more stable results
180+ const cleanTimes = this . removeOutliers ( times ) ;
181+ console . log ( ` 📊 Removed ${ times . length - cleanTimes . length } outliers (${ ( ( times . length - cleanTimes . length ) / times . length * 100 ) . toFixed ( 1 ) } %)` ) ;
182+
183+ const average = cleanTimes . reduce ( ( a , b ) => a + b , 0 ) / cleanTimes . length ;
184+ const standardDeviation = this . calculateStandardDeviation ( cleanTimes ) ;
185+ const confidenceInterval95 = this . calculateConfidenceInterval ( cleanTimes ) ;
186+
166187 this . results . factoryCreation = {
167188 iterations,
168- times,
169- average : times . reduce ( ( a , b ) => a + b , 0 ) / times . length ,
170- median : this . calculateMedian ( times ) ,
171- min : Math . min ( ...times ) ,
172- max : Math . max ( ...times ) ,
189+ times : cleanTimes ,
190+ average,
191+ median : this . calculateMedian ( cleanTimes ) ,
192+ min : Math . min ( ...cleanTimes ) ,
193+ max : Math . max ( ...cleanTimes ) ,
194+ standardDeviation,
195+ confidenceInterval95,
173196 totalDuration
174197 } ;
175198 }
@@ -184,9 +207,22 @@ class PerformanceBenchmark {
184207 } , { } as Record < string , typeof this . testFiles > ) ;
185208
186209 for ( const [ size , files ] of Object . entries ( sizeGroups ) ) {
210+ console . log ( ` 📋 Testing ${ size } files...` ) ;
187211 const iterations = this . getIterationsForSize ( size as any ) ;
188212 const times : number [ ] = [ ] ;
213+
214+ // Create prettyfier once and reuse
189215 const prettyfier = getDocumentPrettyfier ( ) ;
216+
217+ // Warmup phase - process each file a few times
218+ console . log ( ` 🔥 Warming up with ${ size } files...` ) ;
219+ const warmupIterations = Math . min ( 50 , Math . floor ( iterations / 20 ) ) ;
220+ for ( let i = 0 ; i < warmupIterations ; i ++ ) {
221+ const file = files [ i % files . length ] ;
222+ const document = await this . openDocument ( `resources/${ file . name } -input.md` ) ;
223+ prettyfier . provideDocumentFormattingEdits ( document , { } as any , { } as any ) ;
224+ }
225+
190226 const sectionStart = process . hrtime . bigint ( ) ;
191227
192228 for ( let i = 0 ; i < iterations ; i ++ ) {
@@ -203,14 +239,24 @@ class PerformanceBenchmark {
203239 const sectionEnd = process . hrtime . bigint ( ) ;
204240 const totalDuration = Number ( sectionEnd - sectionStart ) / 1_000_000 ; // Convert to milliseconds
205241
242+ // Remove outliers for more stable results
243+ const cleanTimes = this . removeOutliers ( times ) ;
244+ console . log ( ` 📊 Removed ${ times . length - cleanTimes . length } outliers (${ ( ( times . length - cleanTimes . length ) / times . length * 100 ) . toFixed ( 1 ) } %)` ) ;
245+
246+ const average = cleanTimes . reduce ( ( a , b ) => a + b , 0 ) / cleanTimes . length ;
247+ const standardDeviation = this . calculateStandardDeviation ( cleanTimes ) ;
248+ const confidenceInterval95 = this . calculateConfidenceInterval ( cleanTimes ) ;
249+
206250 this . results . documentFormatting [ size ] = {
207251 files : files . map ( f => f . name ) ,
208252 iterations,
209- times,
210- average : times . reduce ( ( a , b ) => a + b , 0 ) / times . length ,
211- median : this . calculateMedian ( times ) ,
212- min : Math . min ( ...times ) ,
213- max : Math . max ( ...times ) ,
253+ times : cleanTimes ,
254+ average,
255+ median : this . calculateMedian ( cleanTimes ) ,
256+ min : Math . min ( ...cleanTimes ) ,
257+ max : Math . max ( ...cleanTimes ) ,
258+ standardDeviation,
259+ confidenceInterval95,
214260 totalDuration
215261 } ;
216262 }
@@ -224,10 +270,10 @@ class PerformanceBenchmark {
224270
225271 private getIterationsForSize ( size : 'small' | 'medium' | 'large' ) : number {
226272 switch ( size ) {
227- case 'small' : return 15000 ;
228- case 'medium' : return 10000 ;
229- case 'large' : return 750 ;
230- default : return 10000 ;
273+ case 'small' : return 25000 ;
274+ case 'medium' : return 8000 ;
275+ case 'large' : return 600 ;
276+ default : return 15000 ;
231277 }
232278 }
233279
@@ -239,6 +285,57 @@ class PerformanceBenchmark {
239285 : sorted [ mid ] ;
240286 }
241287
288+ private removeOutliers ( times : number [ ] ) : number [ ] {
289+ if ( times . length < 10 ) return times ; // Don't remove outliers from small datasets
290+
291+ const sorted = [ ...times ] . sort ( ( a , b ) => a - b ) ;
292+ const q1Index = Math . floor ( sorted . length * 0.25 ) ;
293+ const q3Index = Math . floor ( sorted . length * 0.75 ) ;
294+ const q1 = sorted [ q1Index ] ;
295+ const q3 = sorted [ q3Index ] ;
296+ const iqr = q3 - q1 ;
297+
298+ // Use more conservative outlier detection (3x IQR instead of 1.5x)
299+ const lowerBound = q1 - 3 * iqr ;
300+ const upperBound = q3 + 3 * iqr ;
301+
302+ return times . filter ( time => time >= lowerBound && time <= upperBound ) ;
303+ }
304+
305+ private calculateStandardDeviation ( times : number [ ] ) : number {
306+ const mean = times . reduce ( ( a , b ) => a + b , 0 ) / times . length ;
307+ const squaredDeviations = times . map ( time => Math . pow ( time - mean , 2 ) ) ;
308+ const variance = squaredDeviations . reduce ( ( a , b ) => a + b , 0 ) / times . length ;
309+ return Math . sqrt ( variance ) ;
310+ }
311+
312+ private calculateConfidenceInterval ( times : number [ ] ) : { lower : number ; upper : number } {
313+ const mean = times . reduce ( ( a , b ) => a + b , 0 ) / times . length ;
314+ const stdDev = this . calculateStandardDeviation ( times ) ;
315+ const standardError = stdDev / Math . sqrt ( times . length ) ;
316+
317+ // 95% confidence interval using t-distribution approximation (1.96 for large samples)
318+ const marginOfError = 1.96 * standardError ;
319+
320+ return {
321+ lower : mean - marginOfError ,
322+ upper : mean + marginOfError
323+ } ;
324+ }
325+
326+ private calculateCoefficientOfVariation ( times : number [ ] ) : number {
327+ const mean = times . reduce ( ( a , b ) => a + b , 0 ) / times . length ;
328+ const stdDev = this . calculateStandardDeviation ( times ) ;
329+ return ( stdDev / mean ) * 100 ; // Return as percentage
330+ }
331+
332+ private getStabilityRating ( coefficientOfVariation : number ) : string {
333+ if ( coefficientOfVariation <= 5 ) return '🟢 Excellent' ;
334+ if ( coefficientOfVariation <= 10 ) return '🟡 Good' ;
335+ if ( coefficientOfVariation <= 20 ) return '🟠 Fair' ;
336+ return '🔴 Poor' ;
337+ }
338+
242339 async runFullBenchmark ( ) : Promise < void > {
243340 console . log ( '🚀 Starting Performance Benchmark Suite' ) ;
244341 console . log ( `\nUsing ${ this . testFiles . length } real test files from system tests\n` ) ;
@@ -264,33 +361,41 @@ class PerformanceBenchmark {
264361
265362 // Factory creation results
266363 const factory = this . results . factoryCreation ;
364+ const factoryCv = this . calculateCoefficientOfVariation ( factory . times ) ;
267365 console . log ( `\n🎯 Factory Creation:` ) ;
268366 console . log ( ` Iterations: ${ factory . iterations } ` ) ;
269- console . log ( ` Average: ${ factory . average . toFixed ( 3 ) } ms` ) ;
270- console . log ( ` Median: ${ factory . median . toFixed ( 3 ) } ms` ) ;
271- console . log ( ` Min: ${ factory . min . toFixed ( 3 ) } ms` ) ;
272- console . log ( ` Max: ${ factory . max . toFixed ( 3 ) } ms` ) ;
367+ console . log ( ` Average: ${ factory . average . toFixed ( 6 ) } ms ± ${ factory . standardDeviation . toFixed ( 6 ) } ms` ) ;
368+ console . log ( ` Median: ${ factory . median . toFixed ( 6 ) } ms` ) ;
369+ console . log ( ` 95% CI: [${ factory . confidenceInterval95 . lower . toFixed ( 6 ) } , ${ factory . confidenceInterval95 . upper . toFixed ( 6 ) } ]ms` ) ;
370+ console . log ( ` Range: [${ factory . min . toFixed ( 6 ) } , ${ factory . max . toFixed ( 6 ) } ]ms` ) ;
371+ console . log ( ` Stability: ${ factoryCv . toFixed ( 1 ) } % CV ${ this . getStabilityRating ( factoryCv ) } ` ) ;
273372 console . log ( ` Total Duration: ${ factory . totalDuration . toFixed ( 3 ) } ms` ) ;
274373
275374 // Document formatting results
276375 for ( const [ size , results ] of Object . entries ( this . results . documentFormatting ) ) {
376+ const cv = this . calculateCoefficientOfVariation ( results . times ) ;
277377 console . log ( `\n🎯 Document Formatting (${ size } ):` ) ;
278378 const fileList = results . files . length <= 3
279379 ? results . files . join ( ', ' )
280380 : `${ results . files . slice ( 0 , 3 ) . join ( ', ' ) } ...` ;
281381 console . log ( ` Test files: ${ results . files . length } files (${ fileList } )` ) ;
282382 console . log ( ` Iterations: ${ results . iterations } ` ) ;
283- console . log ( ` Average: ${ results . average . toFixed ( 3 ) } ms` ) ;
383+ console . log ( ` Average: ${ results . average . toFixed ( 3 ) } ms ± ${ results . standardDeviation . toFixed ( 3 ) } ms ` ) ;
284384 console . log ( ` Median: ${ results . median . toFixed ( 3 ) } ms` ) ;
285- console . log ( ` Min: ${ results . min . toFixed ( 3 ) } ms` ) ;
286- console . log ( ` Max: ${ results . max . toFixed ( 3 ) } ms` ) ;
385+ console . log ( ` 95% CI: [${ results . confidenceInterval95 . lower . toFixed ( 3 ) } , ${ results . confidenceInterval95 . upper . toFixed ( 3 ) } ]ms` ) ;
386+ console . log ( ` Range: [${ results . min . toFixed ( 3 ) } , ${ results . max . toFixed ( 3 ) } ]ms` ) ;
387+ console . log ( ` Stability: ${ cv . toFixed ( 1 ) } % CV ${ this . getStabilityRating ( cv ) } ` ) ;
287388 console . log ( ` Total Duration: ${ results . totalDuration . toFixed ( 3 ) } ms` ) ;
288389 }
289390
290391 console . log ( '\n' + '=' . repeat ( 100 ) ) ;
291392 console . log ( `⏱️ OVERALL BENCHMARK DURATION: ${ this . results . overallDuration . toFixed ( 3 ) } ms` ) ;
292393 console . log ( '=' . repeat ( 100 ) ) ;
293- console . log ( '💡 TIP: Run this benchmark before and after code changes to measure improvements!' ) ;
394+ console . log ( '💡 TIPS FOR CONSISTENT BENCHMARKING:' ) ;
395+ console . log ( ' • Close other applications to reduce system interference' ) ;
396+ console . log ( ' • Run multiple times and compare confidence intervals' ) ;
397+ console . log ( ' • Look for CV (Coefficient of Variation) < 10% for reliable measurements' ) ;
398+ console . log ( ' • Focus on trends across multiple runs rather than absolute values' ) ;
294399 console . log ( '=' . repeat ( 100 ) ) ;
295400 }
296401
0 commit comments