Uploaded by alternatemail00000

evaluation-results-on-37-pairs

advertisement
Evaluation Results
Created time
@October 20, 2023 1:01 PM
Tags
ROGUE
Original note baseline
{
"rouge1": 0.6477628975693039,
"rouge2": 0.5160654113330174,
"rougeL": 0.5851892006765682,
"rougeLsum": 0.645327740129239
}
LLaMa pipeline
{
"rouge1": 0.1924092962563515,
"rouge2": 0.04360140315395221,
"rougeL": 0.1003188617643995,
"rougeLsum": 0.1864718905448754
}
BLEU
Original note baseline
{
"bleu": 0.5780947668139638,
"precisions": [
0.6961884368308351,
0.585238013404365,
0.5403844496164124,
0.5072639225181598
],
Evaluation Results
1
"brevity_penalty": 1.0,
"length_ratio": 1.0023179945054945,
"translation_length": 11675,
"reference_length": 11648
}
LLaMa pipeline
{
"bleu": 0.01460177829334982,
"precisions": [
0.14876548341508022,
0.01919154073519004,
0.006462641761174116,
0.0024637741679542323
],
"brevity_penalty": 1.0,
"length_ratio": 2.065418956043956,
"translation_length": 24058,
"reference_length": 11648
}
BERTScore
Original note baseline
precision
recall
f1
count
37.000000
37.000000
37.000000
mean
std
0.914708
0.032952
0.916119
0.033842
0.915360
0.032718
min
25%
0.833803
0.898605
0.826438
0.895524
0.841611
0.892971
50%
75%
0.915494
0.936310
0.923069
0.937829
0.916785
0.937555
max
0.981664
0.973932
0.977040
LLaMa pipeline
count
precision
37.000000
recall
37.000000
f1
37.000000
mean
std
0.803681
0.021186
0.773806
0.010492
0.788334
0.013453
Evaluation Results
2
min
0.697639
0.748151
0.727139
25%
50%
0.798375
0.806670
0.768432
0.774323
0.784077
0.790165
75%
max
0.814142
0.834506
0.778798
0.808619
0.793273
0.821359
Individual scores:
Original note baseline
{'precision': [0.9484990835189819, 0.9388542771339417, 0.8533807396888733, 0.9417873620986
938, 0.9334406852722168, 0.8623211979866028, 0.9311872720718384, 0.9429115056991577, 0.884
2721581459045, 0.8563705086708069, 0.9524185657501221, 0.9181857109069824, 0.8943687677383
423, 0.8832247257232666, 0.9357787370681763, 0.8833404779434204, 0.8338028192520142, 0.932
2030544281006, 0.8878776431083679, 0.9035142064094543, 0.9452351927757263, 0.9154940843582
153, 0.9076628684997559, 0.9043696522712708, 0.9263895750045776, 0.9075359106063843, 0.901
8535017967224, 0.9363096356391907, 0.9066272974014282, 0.9259231090545654, 0.8986054658889
77, 0.9678440690040588, 0.9816638231277466, 0.9317482709884644, 0.9009265899658203, 0.9066
367149353027, 0.9616420269012451], 'recall': [0.9452258348464966, 0.9378289580345154, 0.89
5524263381958, 0.9507357478141785, 0.9358421564102173, 0.8264379501342773, 0.9333416223526
001, 0.9368026256561279, 0.8807306289672852, 0.859757661819458, 0.9530265927314758, 0.9027
142524719238, 0.8787986040115356, 0.8998439311981201, 0.9393388032913208, 0.87485563755035
4, 0.8495674133300781, 0.9333233833312988, 0.8880221843719482, 0.9126322269439697, 0.94067
1443939209, 0.9166913032531738, 0.9069460034370422, 0.9295451641082764, 0.931199908256530
8, 0.9347776770591736, 0.894629955291748, 0.9383251667022705, 0.9139026403427124, 0.923069
0002441406, 0.9009839296340942, 0.9716170430183411, 0.9724586009979248, 0.924977421760559
1, 0.9086248874664307, 0.8797110319137573, 0.9739315509796143], 'f1': [0.94685959815979,
0.9383413195610046, 0.8739447593688965, 0.9462404251098633, 0.9346398711204529, 0.84399837
2554779, 0.932263195514679, 0.9398471713066101, 0.8824978470802307, 0.8580607175827026, 0.
9527224898338318, 0.9103842377662659, 0.8865153193473816, 0.8914568424224854, 0.9375554323
196411, 0.8790775537490845, 0.8416112661361694, 0.932762861251831, 0.8879498839378357, 0.9
080503582954407, 0.9429477453231812, 0.9160923361778259, 0.9073043465614319, 0.91678464412
68921, 0.9287885427474976, 0.9209554195404053, 0.8982271552085876, 0.9373162984848022, 0.9
102504253387451, 0.9244938492774963, 0.8997931480407715, 0.9697268605232239, 0.97703957557
67822, 0.9283504486083984, 0.904759407043457, 0.8929709792137146, 0.9677477478981018], 'ha
shcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.35.0.dev0)'}
LLaMa pipeline
{'precision': [0.8171337246894836, 0.8060520887374878, 0.7884390354156494, 0.8099859952926
Evaluation Results
3
636, 0.8141422271728516, 0.7826334238052368, 0.8300600051879883, 0.8196421265602112, 0.834
5058560371399, 0.7906543016433716, 0.8179484009742737, 0.801543116569519, 0.80024480819702
15, 0.8105652332305908, 0.8142386674880981, 0.7948695421218872, 0.7862777709960938, 0.8167
192935943604, 0.794829785823822, 0.8094707727432251, 0.8138425350189209, 0.819159686565399
2, 0.8068445324897766, 0.8025804758071899, 0.800545871257782, 0.7966451644897461, 0.806669
8908805847, 0.8026790618896484, 0.6976394057273865, 0.8116635680198669, 0.797974467277526
9, 0.7983754277229309, 0.8126678466796875, 0.8050883412361145, 0.809309720993042, 0.814459
6815109253, 0.8001062273979187], 'recall': [0.77308189868927, 0.7782115340232849, 0.771215
6772613525, 0.7658669352531433, 0.772157609462738, 0.7802263498306274, 0.7684320211410522,
0.7651710510253906, 0.808619499206543, 0.7776088714599609, 0.787682056427002, 0.7662060260
772705, 0.7758738994598389, 0.7935733795166016, 0.7751314640045166, 0.748151421546936, 0.7
69829511642456, 0.781481146812439, 0.7716745138168335, 0.7619912624359131, 0.7793250083923
34, 0.7689720988273621, 0.762574315071106, 0.7743232250213623, 0.7629706859588623, 0.77879
81033325195, 0.7762854695320129, 0.7788230180740356, 0.7592425346374512, 0.775569796562194
8, 0.7764715552330017, 0.785670280456543, 0.7688745260238647, 0.761435866355896, 0.7756378
650665283, 0.7822741866111755, 0.771398663520813], 'f1': [0.7944976687431335, 0.7918872237
205505, 0.7797322869300842, 0.7873088717460632, 0.792594313621521, 0.7814280390739441, 0.7
98058032989502, 0.7914705276489258, 0.8213587999343872, 0.7840773463249207, 0.802529990673
0652, 0.7834763526916504, 0.7878709435462952, 0.8019793629646301, 0.7942039370536804, 0.77
08032131195068, 0.7779667377471924, 0.7987117171287537, 0.7830810546875, 0.785013794898986
8, 0.7962098717689514, 0.7932729125022888, 0.7840849757194519, 0.7881986498832703, 0.78130
67436218262, 0.7876205444335938, 0.7911860346794128, 0.7905711531639099, 0.727138519287109
4, 0.7932063341140747, 0.7870761752128601, 0.7919719219207764, 0.7901648879051208, 0.78265
38681983948, 0.792116105556488, 0.7980425953865051, 0.7854902148246765], 'hashcode': 'robe
rta-large_L17_no-idf_version=0.3.12(hug_trans=4.35.0.dev0)'}
Evaluation Results
4
Download