forked from ZipCPU/wb2axip
-
Notifications
You must be signed in to change notification settings - Fork 0
/
axiperf.v
1385 lines (1312 loc) · 43.4 KB
/
axiperf.v
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
////////////////////////////////////////////////////////////////////////////////
//
// Filename: axiperf
// {{{
// Project: WB2AXIPSP: bus bridges and other odds and ends
//
// Purpose: Measure the performance of a high speed AXI interface. The
// {{{
// following monitor requires connecting to both an AXI-lite slave
// interface, as well as a second AXI interface as a monitor. The AXI
// monitor interface is read only, and (ideally) shouldn't be corrupted by
// the inclusion of the AXI-lite interface on the same bus.
//
// The core works by counting clock cycles in a fashion that should
// supposedly make it easy to calculate 1) throughput, and 2) lag.
// Moreover, the counters are arranged such that after the fact, the
// various contributors to throughput can be measured and evaluted: was
// the slave the holdup? Or was it the master?
//
// To use the core, connect it and build your design. Then write a '3'
// to register 15 (address 60). Once the bus returns to idle, the core
// will begin capturing data and statistics. When done, write a '0' to
// the same register. This will create a stop request. Once the bus
// comes to a stop, the core will stop accumulating values into its
// statistics. Those statistics can then be read out from the AXI-lite
// bus and analyzed.
// }}}
// Goals:
// {{{
// My two biggest goals are to measure throughput and lag. Defining those
// two measures, of course, is half the battle. The other half of the
// battle is knowing which side to blame for any particular issue.
//
// Let's start with the total time required for any transaction. This
// equals the time from the indication of a request to the last response.
// We'll use a linear model to describe this transaction time:
//
// Transaction time = Latency + (Beats in transaction) / Throughput
//
// The goal of this core is to help you identify latency and throughput
// numbers.
//
// One measure might be to take the total number of clock cycles, from when
// the core was enabled to when it was disabled, and to divide by the
// number of beats transmitted.
//
// (Poor) Throughput = (Total beats transferred) / (total time)
//
// In a heavily used bus, this might be a good enough measure. However,
// this is a poor measure for most systems where the bus is idle most of
// the time. Instead, it might be nice to start the measurement early
// on during some task, and conclude it much later. In the meantime, the
// bus might go from idle to busy and back again many times. For example,
// you don't want to copy information from the disk drive if you haven't
// made a request of the controller. For these reasons, we try to achieve
// a better measurement.
//
// Here's the basic approach: we'll look at all of the clocks associated
// with any particular type of transaction, and lump them into a couple
// of categories: latency limiting clocks and throughput limiting clocks.
// We'll then divide the latency limiting clocks by the number of bursts
// that have taken place, and divide the total number of beats by the
// time taken to transmit them.
//
// Latency = (latency measures) / (bursts)
// Throughput = (beats) / (transmission duration, inc. beats)
//
// In general, we'll define the transmission duration as the time from the
// first clock cycle that RVALID (or WVALID) is raised until the final
// cycle when RVALID && RREADY && RLAST (or WVALID && WREADY && WLAST).
// Unless we know otherwise, all clock cycles between these two will
// be marked as a transmission duration clock cycles. The exception
// to this rule, however, is the W* channel where one or two W*
// transactions might take place prior to the first AW* transaction. In
// this case, any idle cycles during this time are marked as a latency
// measure, not a throughput measure of transmission duration.
//
// Latency measures, on the other hand, are anything that appear to be
// burst related--such as the time from the request to the first
// RVALID (or WVALID), or similarly the time from the last WVALID && WLAST
// until the final BVALID && BREADY.
//
// These measures are listed in more detail below.
//
// Certain measures below are marked as *ORTHOGONAL*. These are perhaps
// better known as (independent), but I started calling them orthogonal
// and ... will probably do so for some time. Orthogonal measures are
// those that don't overlap. For example, if you just counted AWVALID
// && AWREADY (bursts) and WVALID && WREADY clock cycles (beats), you might
// get a big overlap between the two and so not know which to count. Not
// so with the orthogonal measures.
//
// Further, at the end of every list of orthogonal measures is a metric
// that can be used to calculate total cycles used--that way you know
// how the measures relate.
// }}}
// Registers
// {{{
// 0: Active time
// Number of clock periods that the performance monitor has been
// accumulating data for.
// 4: Max bursts
// Bits 31:24 -- the maximum number of outstanding write bursts at any
// given time. A write burst begins with either
// AWVALID && AWREADY or WVALID && WREADY and ends with
// BVALID && BREADY. This will be the maximum of the two.
// Bits 23:16 -- the maximum number of outstanding read bursts at any
// given time. A read burst begins with ARVALID &&ARREADY,
// and ends with RVALID && RLAST
// Bits 15: 8 -- the maximum write burst size seen, as captured by AWLEN
// Bits 7: 0 -- the maximum read burst size seen, as captured by ARLEN
// 8: Write idle cycles
// Number of cycles where the write channel is totally idle.
// *ORTHOGONAL*
// 12: AWBurst count
// Number of AWVALID && AWREADY's
// 16: Write beat count
// Number of write beats, WVALID && WREADYs
// 20: AW Byte count
// Number of bytes written, as recorded by the AW* channel (not the
// W* channel and WSTRB signals)
// 24: Write Byte count
// Number of bytes written, as recorded by the W* channel and the
// non zero WSTRB's
// 28: Write slow data
// Number of cycles where a write has started, that is WVALID
// and WREADY (but !WLAST) have been seen, but yet WVALID is now
// low. These are only counted if a write address request has
// already been received--otherwise this would be considered
// a latency measure on the AW* channel.
// *ORTHOGONAL*
// 32: wr_stall--Write stalls
// Counts the number of cycles where WVALID && !WREADY, but
// only if AWVALID is true or has been true. This is to
// distinguish from stalls which may take place before AWVALID,
// where the slave may be waiting on AWVALID (lag) versus
// unable to handle the throuhgput. (Those are counted under
// wr_early_stall below ...)
// *ORTHOGONAL*
// 36: wr_addr_lag--Write address channel lagging
// Counts the number of cycles where the write data has been
// present on the channel prior to the write address. This
// includes cycles where AWVALID is true or stalled, just not
// cycles where WVALID is also true--since those have already
// been counted.
// *ORTHOGONAL*
// 40: wr_data_lag--Write data laggging
// The AWVALID && AWREADY has been received, but no data has
// yet been received for this write burst and WVALID remains
// low. (i.e., no BVALIDs are pending either.) This is a
// lag measure since WVALID hasn't shown up (yet) to start sending
// data.
// *ORTHOGONAL*
// 44: wr_awr_early--AWVALID && AWREADY, but only if !WVALID and
// no AWVALID has yet been received. This is a lag measure since
// AWVALID is preceding WVALID.
// *ORTHOGONAL*
// 48: wr_early_beat--WVALID && WREADY && !AWVALID, and also prior to
// any AWVALID. This value is double counted in the write
// beat counts, so you will need to subtract the two if you
// wish to separate them.
// *Otherwise ORTHOGONAL*
// 52: wr_addr_stall--AWVALID && !AWREADY, but only if !WVALID and
// no AWVALID has yet been received. (This keeps it from being
// double counted as part of a throughput measure.)
// *ORTHOGONAL*
// 56: wr_early_stall--WVALID && !WREADY, but only if this burst has
// not yet started and no AWVALID has yet been received. That
// makes this a lag measure, since the slave is likely waiting
// for the address before starting to process the burst.
// *ORTHOGONAL*
// 60: b_lag_count
// Counts the number of cycles between the last accepted AWVALID
// and WVALID && WLAST and its corresponding BVALID. This is
// the number of cycles where BVALID could be high in response
// to any burst, but yet where it isn't. To avoid interfering
// with the throughput measure, this excludes any cycles where
// WVALID is also true.
// *ORTHOGONAL*
// 64: b_stall_count
// Number of cycles where BVALID && !BREADY. This could be a
// possible indication of backpressure in the interconnect.
// This also excludes any cycles where WVALID is also true.
// *ORTHOGONAL*
//
// 72: Write Bias
// Total number of cycles between the first AWVALID and the
// first WVALID, minus the total number of cycles between the
// first WVALID and the first AWVALID. This is a measure of
// how often AWV clock cycles come before the first WV cycle and
// by how much. To make use of this statistic, divide it by the
// total number of bursts for the average distance between the
// first AWV and the first WV. Negative distances are possible
// if the first WV tends to precede the first AWV.
// 76: AWR Cycles
// Number of clock cycles between the first AWVALID of any burst
// and the last BVALID && BREADY clearing the channel again.
// This includes any cycles where AWVALID && !AWREADY prior to the
// first brust being accepted.
//
// 80: Write cycles
// Number of clock cycles between the first WVALID of any burst
// and the last BVALID && BREADY clearing the channel again.
// This includes the number of cycles where WVALID && !WREADY,
// even if the channel would be otherwise idle.
//
// Total write cycles = max(AWR Cycles, Write Cycles)
// = (wr_addr_lag+wr_data_lag+wr_awr_early+wr_early_beat
// + wr_addr_stall + wr_b_lag_count + wr_b_stall_count)
// + (wr_slow_data + wr_stall + wr_beats - wr_early_beats)
//
// Latency = (wr_addr_lag + wr_data_lag + wr_awr_early + wr_early_beat
// + wr_addr_stall + wr_b_lag + wr_b_stall) / WR BURSTS
// Throughput= (wr_beats) /
// (wr_slow_data + wr_stall + wr_beats - wr_early_beats)
//
// 84: Read idle cycles
// Number of clock cycles, while the core is collecting, where
// nothing is happening on the read channel--ARVALID is low,
// nothing is outstanding, etc. *ORTHOGONAL*
// 88: Max responding bursts
// This is the maximum number of bursts that have been responding
// at the same time, as counted by the maximum number of ID's
// which have seen an RVALID but not RLAST. It's an estimate of
// how out of order the channel has become.
// 92: Read burst count
// The total number of RVALID && RREADY && RLAST's seen
// 96: Read beat count
// The total number of beats requested, as measured by
// RVALID && RREADY (or equivalently by ARLEN ... but we measure
// RVALID && RREADY here). *ORTHOGONAL*
// 100: Read byte count
// The total number of bytes requested, as measured by ARSIZE
// and ARLEN.
// 104: AR cycles
// Total number of cycles where the interface is idle, but yet
// ARVALID && ARREADY are both true. Yes, it'll be busy on the
// next cycle, but we still need to count them.
// *ORTHOGONAL*
// 108: AR stalls
// Total number of clock cycles where ARVALID && !ARREADY, but
// only under the condition that nothing is currently outstanding.
// If the master refuses to allow a second AR* burst into the
// pipeline, this should show in the maximum number of outstanding
// read bursts ever allowed. *ORTHOGONAL*
// 112: R stalls
// Total number of clock cycles where RVALID && !RREADY. This is
// an indication of a master that has issued more read requests
// than it can process, and so it is suffering from internal
// back pressure. *ORTHOGONAL*
// 116: Lag counter
// Counts the number of clock cycles where an outstanding read
// request exists, but for which no data has (yet) been returned.
// *ORTHOGONAL*
// 120: Slow link
// Counts the number of clock cycles where RVALID is low, but yet
// a burst return has already started but not yet been completed.
// *ORTHOGONAL*
//
// If we've done this right, then
//
// active_time == read idle cycles (channel is idle)
// + read_beat_count (data is transferred)_
// + r stalls (Master isn't ready)
// + lag counter (No data is ready)
// + slow link (Slave isn't ready))
// + rd_ar_stalls (Slave not ready for AR*)
// + rd_ar_cycles (Slave accepted AR*, o.w. idle)
//
// We can then measure read throughput as the number of
// active cycles (active time - read idle counts) divided by the
// number of bytes (or beats) transferred (depending upon the
// units you want.
//
// Lag would be measured by the lag counter divided by the number
// of read bursts.
//
// 124: Control register
// Write a 1 to this register to start recording, and a 0 to this
// register to stop. Writing a 2 will clear the counters as
// well.
//
// Performance:
// Write Throughput = (Wr Beats) / (Wr Beats + WrStalls + WrSlow);
// Read Throughput = (Rd Beats) / (Rd Beats + R Stalls + RSlow);
// Read Latency = (AR Stalls + RdLag) ./ (Rd Bursts)
// }}}
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
// }}}
// Copyright (C) 2020-2021, Gisselquist Technology, LLC
// {{{
//
// This file is part of the WB2AXIP project.
//
// The WB2AXIP project contains free software and gateware, licensed under the
// Apache License, Version 2.0 (the "License"). You may not use this project,
// or this file, except in compliance with the License. You may obtain a copy
// of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
//
////////////////////////////////////////////////////////////////////////////////
// }}}
//
`default_nettype none
//
module axiperf #(
// {{{
//
// Size of the AXI-lite bus. These are fixed, since 1) AXI-lite
// is fixed at a width of 32-bits by Xilinx def'n, and 2) since
// we only ever have 4 configuration words.
parameter C_AXIL_ADDR_WIDTH = 7,
localparam C_AXIL_DATA_WIDTH = 32,
parameter C_AXI_DATA_WIDTH = 32,
parameter C_AXI_ADDR_WIDTH = 32,
parameter C_AXI_ID_WIDTH = 4,
parameter [0:0] OPT_LOWPOWER = 0,
parameter LGCNT = 32
// }}}
) (
// {{{
input wire S_AXI_ACLK,
input wire S_AXI_ARESETN,
//
input wire S_AXIL_AWVALID,
output wire S_AXIL_AWREADY,
input wire [C_AXIL_ADDR_WIDTH-1:0] S_AXIL_AWADDR,
input wire [2:0] S_AXIL_AWPROT,
//
input wire S_AXIL_WVALID,
output wire S_AXIL_WREADY,
input wire [C_AXIL_DATA_WIDTH-1:0] S_AXIL_WDATA,
input wire [C_AXIL_DATA_WIDTH/8-1:0] S_AXIL_WSTRB,
//
output wire S_AXIL_BVALID,
input wire S_AXIL_BREADY,
output wire [1:0] S_AXIL_BRESP,
//
input wire S_AXIL_ARVALID,
output wire S_AXIL_ARREADY,
input wire [C_AXIL_ADDR_WIDTH-1:0] S_AXIL_ARADDR,
input wire [2:0] S_AXIL_ARPROT,
//
output wire S_AXIL_RVALID,
input wire S_AXIL_RREADY,
output wire [C_AXIL_DATA_WIDTH-1:0] S_AXIL_RDATA,
output wire [1:0] S_AXIL_RRESP,
//
//
// The AXI Monitor interface
//
input wire M_AXI_AWVALID,
input wire M_AXI_AWREADY,
input wire [C_AXI_ID_WIDTH-1:0] M_AXI_AWID,
input wire [C_AXI_ADDR_WIDTH-1:0] M_AXI_AWADDR,
input wire [7:0] M_AXI_AWLEN,
input wire [2:0] M_AXI_AWSIZE,
input wire [1:0] M_AXI_AWBURST,
input wire M_AXI_AWLOCK,
input wire [3:0] M_AXI_AWCACHE,
input wire [2:0] M_AXI_AWPROT,
input wire [3:0] M_AXI_AWQOS,
//
//
input wire M_AXI_WVALID,
input wire M_AXI_WREADY,
input wire [C_AXI_DATA_WIDTH-1:0] M_AXI_WDATA,
input wire [C_AXI_DATA_WIDTH/8-1:0] M_AXI_WSTRB,
input wire M_AXI_WLAST,
//
//
input wire M_AXI_BVALID,
input wire M_AXI_BREADY,
input wire [C_AXI_ID_WIDTH-1:0] M_AXI_BID,
input wire [1:0] M_AXI_BRESP,
//
//
input wire M_AXI_ARVALID,
input wire M_AXI_ARREADY,
input wire [C_AXI_ID_WIDTH-1:0] M_AXI_ARID,
input wire [C_AXI_ADDR_WIDTH-1:0] M_AXI_ARADDR,
input wire [7:0] M_AXI_ARLEN,
input wire [2:0] M_AXI_ARSIZE,
input wire [1:0] M_AXI_ARBURST,
input wire M_AXI_ARLOCK,
input wire [3:0] M_AXI_ARCACHE,
input wire [2:0] M_AXI_ARPROT,
input wire [3:0] M_AXI_ARQOS,
//
input wire M_AXI_RVALID,
input wire M_AXI_RREADY,
input wire [C_AXI_ID_WIDTH-1:0] M_AXI_RID,
input wire [C_AXI_DATA_WIDTH-1:0] M_AXI_RDATA,
input wire M_AXI_RLAST,
input wire [1:0] M_AXI_RRESP
//
// }}}
);
////////////////////////////////////////////////////////////////////////
//
// Register/wire signal declarations
// {{{
////////////////////////////////////////////////////////////////////////
//
//
localparam ADDRLSB = $clog2(C_AXIL_DATA_WIDTH/8);
wire i_reset = !S_AXI_ARESETN;
// AXI signaling
// {{{
wire axil_write_ready;
wire [C_AXIL_ADDR_WIDTH-ADDRLSB-1:0] awskd_addr;
//
wire [C_AXIL_DATA_WIDTH-1:0] wskd_data;
wire [C_AXIL_DATA_WIDTH/8-1:0] wskd_strb;
reg axil_bvalid;
//
wire axil_read_ready;
wire [C_AXIL_ADDR_WIDTH-ADDRLSB-1:0] arskd_addr;
reg [C_AXIL_DATA_WIDTH-1:0] axil_read_data;
reg axil_read_valid;
wire awskd_valid, wskd_valid;
wire arskd_valid;
// }}}
reg idle_bus, triggered, stop_request,
clear_request, start_request;
reg [LGCNT-1:0] active_time;
reg [7:0] wr_max_burst_size;
reg [LGCNT-1:0] wr_awburst_count, wr_wburst_count, wr_beat_count;
reg [LGCNT-1:0] wr_aw_byte_count, wr_w_byte_count;
reg [7:0] wr_aw_outstanding, wr_w_outstanding,
wr_aw_max_outstanding, wr_w_max_outstanding,
wr_max_outstanding, wr_now_outstanding;
reg wr_aw_zero_outstanding, wr_w_zero_outstanding,
wr_in_progress;
reg [LGCNT-1:0] wr_idle_cycles,
wr_b_lag_count, wr_b_stall_count,
wr_slow_data, wr_stall, wr_early_beat, // wr_beat,
wr_addr_lag, wr_data_lag, wr_awr_early,
wr_bias, wr_addr_stall, wr_early_stall;
reg[C_AXI_DATA_WIDTH/8:0] wstrb_count;
reg [LGCNT-1:0] rd_idle_cycles, rd_lag_counter, rd_slow_link,
rd_burst_count, rd_byte_count, rd_beat_count,
rd_ar_stalls, rd_r_stalls, rd_ar_cycles;
reg [7:0] rd_outstanding_bursts, rd_max_burst_size,
rd_max_outstanding_bursts;
reg [7:0] rd_outstanding_bursts_id [0:(1<<C_AXI_ID_WIDTH)-1];
reg [(1<<C_AXI_ID_WIDTH)-1:0] rd_nonzero_outstanding_id,
rd_bursts_in_flight;
reg [C_AXI_ID_WIDTH:0] rd_total_in_flight, rd_responding,
rd_max_responding_bursts;
reg rd_responding_d;
reg [LGCNT-1:0] wr_cycles, awr_cycles;
reg last_awr_stall, last_wr_stall;
integer ik;
genvar gk;
// }}}
////////////////////////////////////////////////////////////////////////
//
// AXI-lite signaling
// {{{
////////////////////////////////////////////////////////////////////////
//
//
//
// Write signaling
//
// {{{
skidbuffer #(.OPT_OUTREG(0),
.OPT_LOWPOWER(OPT_LOWPOWER),
.DW(C_AXIL_ADDR_WIDTH-ADDRLSB))
axilawskid(//
.i_clk(S_AXI_ACLK), .i_reset(i_reset),
.i_valid(S_AXIL_AWVALID), .o_ready(S_AXIL_AWREADY),
.i_data(S_AXIL_AWADDR[C_AXIL_ADDR_WIDTH-1:ADDRLSB]),
.o_valid(awskd_valid), .i_ready(axil_write_ready),
.o_data(awskd_addr));
skidbuffer #(.OPT_OUTREG(0),
.OPT_LOWPOWER(OPT_LOWPOWER),
.DW(C_AXIL_DATA_WIDTH+C_AXIL_DATA_WIDTH/8))
axilwskid(//
.i_clk(S_AXI_ACLK), .i_reset(i_reset),
.i_valid(S_AXIL_WVALID), .o_ready(S_AXIL_WREADY),
.i_data({ S_AXIL_WDATA, S_AXIL_WSTRB }),
.o_valid(wskd_valid), .i_ready(axil_write_ready),
.o_data({ wskd_data, wskd_strb }));
assign axil_write_ready = awskd_valid && wskd_valid
&& (!S_AXIL_BVALID || S_AXIL_BREADY);
initial axil_bvalid = 0;
always @(posedge S_AXI_ACLK)
if (i_reset)
axil_bvalid <= 0;
else if (axil_write_ready)
axil_bvalid <= 1;
else if (S_AXIL_BREADY)
axil_bvalid <= 0;
assign S_AXIL_BVALID = axil_bvalid;
assign S_AXIL_BRESP = 2'b00;
// }}}
//
// Read signaling
//
// {{{
skidbuffer #(.OPT_OUTREG(0),
.OPT_LOWPOWER(OPT_LOWPOWER),
.DW(C_AXIL_ADDR_WIDTH-ADDRLSB))
axilarskid(//
.i_clk(S_AXI_ACLK), .i_reset(i_reset),
.i_valid(S_AXIL_ARVALID), .o_ready(S_AXIL_ARREADY),
.i_data(S_AXIL_ARADDR[C_AXIL_ADDR_WIDTH-1:ADDRLSB]),
.o_valid(arskd_valid), .i_ready(axil_read_ready),
.o_data(arskd_addr));
assign axil_read_ready = arskd_valid
&& (!axil_read_valid || S_AXIL_RREADY);
initial axil_read_valid = 1'b0;
always @(posedge S_AXI_ACLK)
if (i_reset)
axil_read_valid <= 1'b0;
else if (axil_read_ready)
axil_read_valid <= 1'b1;
else if (S_AXIL_RREADY)
axil_read_valid <= 1'b0;
assign S_AXIL_RVALID = axil_read_valid;
assign S_AXIL_RDATA = axil_read_data;
assign S_AXIL_RRESP = 2'b00;
// }}}
// }}}
////////////////////////////////////////////////////////////////////////
//
// AXI-lite register logic
// {{{
////////////////////////////////////////////////////////////////////////
//
//
always @(posedge S_AXI_ACLK)
begin
clear_request <= 1'b0;
if (!clear_request && idle_bus)
begin
start_request <= 0;
stop_request <= 0;
end
if (axil_write_ready)
begin
case(awskd_addr)
5'h1f: if (wskd_strb[0]) begin
// Start, stop, clear, reset
//
clear_request <= wskd_data[1] && !wskd_data[0];
stop_request <= !wskd_data[0];
start_request <= wskd_data[0] && (!stop_request);
end
default: begin end
endcase
end
if (!S_AXI_ARESETN)
begin
clear_request <= 1'b0;
stop_request <= 1'b0;
start_request <= 1'b0;
end
end
initial axil_read_data = 0;
always @(posedge S_AXI_ACLK)
if (OPT_LOWPOWER && !S_AXI_ARESETN)
axil_read_data <= 0;
else if (!S_AXIL_RVALID || S_AXIL_RREADY)
begin
axil_read_data <= 0;
case(arskd_addr)
5'h00: axil_read_data[LGCNT-1:0] <= active_time;
5'h01: axil_read_data <= { wr_max_outstanding,
rd_max_outstanding_bursts,
wr_max_burst_size,
rd_max_burst_size };
5'h02: axil_read_data[LGCNT-1:0] <= wr_idle_cycles;
5'h03: axil_read_data[LGCNT-1:0] <= wr_awburst_count;
5'h04: axil_read_data[LGCNT-1:0] <= wr_beat_count;
5'h05: axil_read_data[LGCNT-1:0] <= wr_aw_byte_count;
5'h06: axil_read_data[LGCNT-1:0] <= wr_w_byte_count;
//
5'h07: axil_read_data[LGCNT-1:0] <= wr_slow_data;
5'h08: axil_read_data[LGCNT-1:0] <= wr_stall;
5'h09: axil_read_data[LGCNT-1:0] <= wr_addr_lag;
5'h0a: axil_read_data[LGCNT-1:0] <= wr_data_lag;
5'h0b: axil_read_data[LGCNT-1:0] <= wr_awr_early;
5'h0c: axil_read_data[LGCNT-1:0] <= wr_early_beat;
5'h0d: axil_read_data[LGCNT-1:0] <= wr_addr_stall;
5'h0e: axil_read_data[LGCNT-1:0] <= wr_early_stall;
5'h0f: axil_read_data[LGCNT-1:0] <= wr_b_lag_count;
5'h10: axil_read_data[LGCNT-1:0] <= wr_b_stall_count;
// 5'h10:
//
5'h12: axil_read_data[LGCNT-1:0] <= wr_bias;
5'h13: axil_read_data[LGCNT-1:0] <= awr_cycles;
5'h14: axil_read_data[LGCNT-1:0] <= wr_cycles;
//
5'h15: axil_read_data[LGCNT-1:0] <= rd_idle_cycles;
5'h16: axil_read_data <= {
{(C_AXIL_DATA_WIDTH-C_AXI_ID_WIDTH-1){1'b0}},
rd_max_responding_bursts };
5'h17: axil_read_data[LGCNT-1:0] <= rd_burst_count;
5'h18: axil_read_data[LGCNT-1:0] <= rd_beat_count;
5'h19: axil_read_data[LGCNT-1:0] <= rd_byte_count;
5'h1a: axil_read_data[LGCNT-1:0] <= rd_ar_cycles;
5'h1b: axil_read_data[LGCNT-1:0] <= rd_ar_stalls;
5'h1c: axil_read_data[LGCNT-1:0] <= rd_r_stalls;
5'h1d: axil_read_data[LGCNT-1:0] <= rd_lag_counter;
5'h1e: axil_read_data[LGCNT-1:0] <= rd_slow_link;
5'h1f: axil_read_data <= {
// pending_idle,
// pending_first_burst,
// cleared,
28'h0, 1'b0,
triggered,
clear_request,
start_request
};
default: begin end
endcase
if (OPT_LOWPOWER && !axil_read_ready)
axil_read_data <= 0;
end
function [C_AXI_DATA_WIDTH-1:0] apply_wstrb;
input [C_AXI_DATA_WIDTH-1:0] prior_data;
input [C_AXI_DATA_WIDTH-1:0] new_data;
input [C_AXI_DATA_WIDTH/8-1:0] wstrb;
integer k;
for(k=0; k<C_AXI_DATA_WIDTH/8; k=k+1)
begin
apply_wstrb[k*8 +: 8]
= wstrb[k] ? new_data[k*8 +: 8] : prior_data[k*8 +: 8];
end
endfunction
// }}}
////////////////////////////////////////////////////////////////////////
//
// AXI performance counters
// {{{
////////////////////////////////////////////////////////////////////////
//
//
// triggered
// {{{
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN || clear_request)
triggered <= 0;
else if (start_request)
begin
if (idle_bus)
triggered <= 1'b1;
end else if (stop_request && idle_bus)
triggered <= 0;
// }}}
// active_time : count number of cycles while triggered
// {{{
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN || clear_request)
active_time <= 0;
else if (triggered)
active_time <= active_time + 1;
// }}}
// idle_bus : Can we start or stop our couters? Can't if not idle
// {{{
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN)
idle_bus <= 1;
else if (M_AXI_AWVALID || M_AXI_WVALID || M_AXI_ARVALID)
idle_bus <= 0;
else if ((wr_aw_outstanding
==((M_AXI_BVALID && M_AXI_BREADY) ? 1:0))
&& (wr_w_outstanding == ((M_AXI_BVALID && M_AXI_BREADY) ? 1:0))
&& (rd_outstanding_bursts
==((M_AXI_RVALID && M_AXI_RREADY && M_AXI_RLAST)? 1:0)))
idle_bus <= 1;
// }}}
////////////////////////////////////////////////////////////////////////
//
// Write statistics
// {{{
////////////////////////////////////////////////////////////////////////
//
//
// wr_max_burst_size: max of all AWLEN values
// {{{
initial wr_max_burst_size = 0;
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN || clear_request)
wr_max_burst_size <= 0;
else if (triggered)
begin
if (M_AXI_AWVALID && M_AXI_AWLEN > wr_max_burst_size)
wr_max_burst_size <= M_AXI_AWLEN;
end
// }}}
// wr_awburst_count -- count AWVALID && AWREADY
// {{{
initial wr_awburst_count = 0;
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN || clear_request)
wr_awburst_count <= 0;
else if (triggered && M_AXI_AWVALID && M_AXI_AWREADY)
wr_awburst_count <= wr_awburst_count + 1;
// }}}
// wr_wburst_count -- count of WVALID && WLAST && WREADY
// {{{
initial wr_wburst_count = 0;
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN || clear_request)
wr_wburst_count <= 0;
else if (triggered && M_AXI_WVALID && M_AXI_WREADY && M_AXI_WLAST)
wr_wburst_count <= wr_wburst_count + 1;
// }}}
// wr_beat_count -- count of WVALID && WREADY
// {{{
initial wr_beat_count = 0;
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN || clear_request)
wr_beat_count <= 0;
else if (triggered && M_AXI_WVALID && M_AXI_WREADY)
wr_beat_count <= wr_beat_count + 1;
// }}}
// wstrb_count -- combinatorial, current active strobe count
// {{{
always @(*)
begin
wstrb_count = 0;
for(ik=0; ik<C_AXI_DATA_WIDTH/8; ik=ik+1)
if (M_AXI_WSTRB[ik])
wstrb_count = wstrb_count + 1;
end
// }}}
// wr_aw_byte_count : count of (AWLEN+1)<<AWSIZE
// {{{
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN || clear_request)
wr_aw_byte_count <= 0;
else if (triggered && M_AXI_AWVALID && M_AXI_AWREADY)
begin
wr_aw_byte_count <= wr_aw_byte_count
+ (({ 24'b0, M_AXI_AWLEN}+32'h1) << M_AXI_AWSIZE);
end
// }}}
// wr_w_byte_count : Count of active WSTRBs
// {{{
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN || clear_request)
wr_w_byte_count <= 0;
else if (triggered && M_AXI_WVALID && M_AXI_WREADY)
begin
wr_w_byte_count <= wr_w_byte_count
+ { {(32-C_AXI_DATA_WIDTH/8-1){1'b0}}, wstrb_count };
end
// }}}
// wr_aw_outstanding, wr_aw_zero_outstanding: AWV && AWR - BV && BR
// {{{
initial wr_aw_outstanding = 0;
initial wr_aw_zero_outstanding = 1;
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN)
begin
wr_aw_outstanding <= 0;
wr_aw_zero_outstanding <= 1;
end else case ({ M_AXI_AWVALID && M_AXI_AWREADY,
M_AXI_BVALID && M_AXI_BREADY })
2'b10: begin
wr_aw_outstanding <= wr_aw_outstanding + 1;
wr_aw_zero_outstanding <= 0;
end
2'b01: begin
wr_aw_outstanding <= wr_aw_outstanding - 1;
wr_aw_zero_outstanding <= (wr_aw_outstanding <= 1);
end
default: begin end
endcase
// }}}
// wr_aw_max_outstanding : max of wr_aw_outstanding
// {{{
initial wr_aw_max_outstanding = 0;
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN || clear_request)
wr_aw_max_outstanding <= 0;
else if (triggered && (wr_aw_max_outstanding < wr_aw_outstanding))
wr_aw_max_outstanding <= wr_aw_outstanding;
// }}}
// wr_w_outstanding, wr_w_zero_outstanding: WV & WR & WL - BV & BR
// {{{
initial wr_w_outstanding = 0;
initial wr_w_zero_outstanding = 1;
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN)
begin
wr_w_outstanding <= 0;
wr_w_zero_outstanding <= 1;
end else case ({ M_AXI_WVALID && M_AXI_WREADY && M_AXI_WLAST,
M_AXI_BVALID && M_AXI_BREADY })
2'b10: begin
wr_w_outstanding <= wr_w_outstanding + 1;
wr_w_zero_outstanding <= 0;
end
2'b01: begin
wr_w_outstanding <= wr_w_outstanding - 1;
wr_w_zero_outstanding <= (wr_w_outstanding <= 1);
end
default: begin end
endcase
// }}}
// wr_w_max_outstanding: max of wr_w_outstanding + wr_in_progress
// {{{
initial wr_w_max_outstanding = 0;
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN || clear_request)
wr_w_max_outstanding <= 0;
else if (triggered)
begin
if (wr_w_outstanding + (wr_in_progress ? 1:0)
> wr_max_outstanding)
wr_w_max_outstanding <= wr_w_outstanding
+ (wr_in_progress ? 1:0);
end
// }}}
// wr_now_outs*, wr_max_outs*: max of wr_w_outs* and wr_aw_outs*
// {{{
always @(*)
begin
wr_now_outstanding = 0;
wr_now_outstanding = wr_w_max_outstanding;
if (wr_aw_max_outstanding > wr_now_outstanding)
wr_now_outstanding = wr_aw_max_outstanding;
end
initial wr_max_outstanding = 0;
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN || clear_request)
wr_max_outstanding <= 0;
else if (triggered)
begin
if (wr_now_outstanding > wr_max_outstanding)
wr_max_outstanding <= wr_now_outstanding;
end
// }}}
// wr_in_progress: Flag, true between WVALID and WV && WR && WLAST
// {{{
initial wr_in_progress = 0;
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN)
wr_in_progress <= 0;
else case ({ M_AXI_WVALID && (!M_AXI_WREADY || !M_AXI_WLAST),
M_AXI_WVALID && M_AXI_WREADY && M_AXI_WLAST })
2'b10: wr_in_progress <= 1;
2'b01: wr_in_progress <= 0;
default: begin end
endcase
// }}}
// Orthogonal write statistics
// {{{
// Here's where we capture our orthogonal measures for the write
// channel. It's important that, for all of these counters, only
// one of them ever counts a given burst. Hence, our criteria are
// binned and orthogonalized below.
//
// AW_O W_O WIP AWV AWR WV WR BV BR
// 0 0 0 0 0 IDLE-CYCLE
// 1 1 0 SLOW-DATA
// 1 1 0 Write stall (#1)
// 0 1 1 0 Write stall (#2)
// 1 1 W-BEAT (Counted elsewhere)
//
// 0 0 0 1 0 W-DATA-LAG (#1)
// 1 0 0 0 W-DATA-LAG (#2)
// 0 1 0 0 WR Early (AWR after WLAST)
// 0 1 0 Write data before AWR
//
//
//
// 0 0 1 1 Early write beat (special)
// 1 1 AW-BURST (Counted elsewhere)
//
// (DRAFT) Single channel AWR orthogonal
// {{{
// AWR bursts (got that)
// AWR Cycles = (AWR latency) + (WR Beats) / (Throughput)
//
// AWR bias = (counts where W follows AW)
// - (counts where AW follows W)
// If (AWR bias > 0), then
// Write lag = (BLAG + AWR bias) / AWR beats
// Else if (AWR bias < 0) (WData before AWVALID), then
// Write lag = (BLAG - AWR bias) / AWR beats
// Write throughput = (WR BEATS + WR STALL + WR SLOW
// + AWR bias) / WR Beats
// }}}
// Skip the boring stuffs (if using VIM folding)
// {{{
initial wr_data_lag = 0;
initial wr_idle_cycles = 0;
initial wr_b_lag_count = 0;
initial wr_b_stall_count = 0;
initial wr_slow_data = 0;
initial wr_stall = 0;
initial wr_early_beat = 0;
// initial wr_aw_burst = 0;
initial wr_addr_stall = 0;
initial wr_addr_lag = 0;
always @(posedge S_AXI_ACLK)
if (!S_AXI_ARESETN || clear_request)
begin
wr_data_lag <= 0;
wr_idle_cycles <= 0;
wr_b_lag_count <= 0;
wr_b_stall_count <= 0;
wr_slow_data <= 0;
wr_stall <= 0;
wr_data_lag <= 0;
wr_addr_stall <= 0;
wr_addr_lag <= 0;
wr_early_stall <= 0;
end else if (triggered)
// }}}
casez({ !wr_aw_zero_outstanding, !wr_w_zero_outstanding,
wr_in_progress,
M_AXI_AWVALID, M_AXI_AWREADY,
M_AXI_WVALID, M_AXI_WREADY,
M_AXI_BVALID, M_AXI_BREADY })
9'b0000?0???: wr_idle_cycles <= wr_idle_cycles + 1;
// 9'b11?????11: begin end // BURST count
//
// Throughput measures
9'b1?1??0???: wr_slow_data <= wr_slow_data + 1;
9'b1????10??: wr_stall <= wr_stall + 1; // Stall #1
9'b0?1??10??: wr_stall <= wr_stall + 1; // Stall #2
//
9'b0??0?11??: wr_early_beat<= wr_early_beat + 1; // Before AWV
// 9'b1??0?11??: wr_beat <= wr_beat + 1;
// 9'b???1?11??: wr_beat <= wr_beat + 1;
//
// Lag measures
9'b000110???: wr_awr_early <= wr_awr_early + 1;
9'b000100???: wr_addr_stall <= wr_addr_stall + 1;
9'b100??0?0?: wr_data_lag <= wr_data_lag + 1;
9'b010??0???: wr_addr_lag <= wr_addr_lag + 1;
9'b0?1??0???: wr_addr_lag <= wr_addr_lag + 1;
9'b0?0??10??: wr_early_stall<= wr_early_stall+ 1;
9'b110??0?0?: wr_b_lag_count <= wr_b_lag_count + 1;
9'b110??0?10: wr_b_stall_count <= wr_b_stall_count + 1;
//
default: begin end
endcase