-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathUserManual_NRCS_M4.html
3988 lines (3986 loc) · 467 KB
/
UserManual_NRCS_M4.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>M4_UserManual_5May2023_ChangesSaved</title>
<meta name="author" content="Fleming, Sean - FPAC-NRCS, Portland, OR">
<style type="text/css"> * {margin:0; padding:0; text-indent:0; }
h1 { color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: bold; text-decoration: none; font-size: 28pt; }
h2 { color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: bold; text-decoration: none; font-size: 18pt; }
.s1 { color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: bold; text-decoration: none; font-size: 11.5pt; vertical-align: 5pt; }
.s2 { color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: normal; text-decoration: none; font-size: 11pt; }
h3 { color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: bold; text-decoration: none; font-size: 16pt; }
.s3 { color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: bold; text-decoration: none; font-size: 10.5pt; vertical-align: 5pt; }
.p, p { color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; margin:0pt; }
.s4 { color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 7pt; vertical-align: 3pt; }
h4 { color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: bold; text-decoration: none; font-size: 14pt; }
.s5 { color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: bold; text-decoration: none; font-size: 12pt; }
.s6 { color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: bold; text-decoration: none; font-size: 8pt; vertical-align: 4pt; }
.s7 { color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: bold; text-decoration: none; font-size: 9pt; vertical-align: 4pt; }
.s8 { color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 8pt; }
.s9 { color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 8pt; }
.s10 { color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 10pt; }
.s11 { color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: normal; text-decoration: none; font-size: 7pt; vertical-align: 3pt; }
.s13 { color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: normal; text-decoration: none; font-size: 2pt; }
.s14 { color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: normal; text-decoration: none; font-size: 12pt; }
.s15 { color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: normal; text-decoration: none; font-size: 8pt; vertical-align: 4pt; }
.s16 { color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
.s17 { color: black; font-family:"Times New Roman", serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
.s18 { color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 9pt; }
.s19 { color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 9pt; }
.s20 { color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
li {display: block; }
#l1 {padding-left: 0pt;counter-reset: c1 1; }
#l1> li>*:first-child:before {counter-increment: c1; content: counter(c1, decimal)" "; color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: bold; text-decoration: none; font-size: 16pt; }
#l1> li:first-child>*:first-child:before {counter-increment: c1 0; }
#l2 {padding-left: 0pt;counter-reset: c2 1; }
#l2> li>*:first-child:before {counter-increment: c2; content: counter(c1, decimal)"."counter(c2, decimal)" "; color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: bold; text-decoration: none; font-size: 14pt; }
#l2> li:first-child>*:first-child:before {counter-increment: c2 0; }
#l3 {padding-left: 0pt;counter-reset: d1 1; }
#l3> li>*:first-child:before {counter-increment: d1; content: counter(d1, decimal)". "; color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l3> li:first-child>*:first-child:before {counter-increment: d1 0; }
#l4 {padding-left: 0pt;counter-reset: c3 1; }
#l4> li>*:first-child:before {counter-increment: c3; content: counter(c1, decimal)"."counter(c2, decimal)"."counter(c3, decimal)" "; color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: bold; text-decoration: none; font-size: 12pt; }
#l4> li:first-child>*:first-child:before {counter-increment: c3 0; }
#l5 {padding-left: 0pt; }
#l5> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l6 {padding-left: 0pt; }
#l6> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l7 {padding-left: 0pt; }
#l7> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l8 {padding-left: 0pt;counter-reset: c3 1; }
#l8> li>*:first-child:before {counter-increment: c3; content: counter(c1, decimal)"."counter(c2, decimal)"."counter(c3, decimal)" "; color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: bold; text-decoration: none; font-size: 12pt; }
#l8> li:first-child>*:first-child:before {counter-increment: c3 0; }
#l9 {padding-left: 0pt;counter-reset: c2 1; }
#l9> li>*:first-child:before {counter-increment: c2; content: counter(c1, decimal)"."counter(c2, decimal)" "; color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: bold; text-decoration: none; font-size: 14pt; }
#l9> li:first-child>*:first-child:before {counter-increment: c2 0; }
#l10 {padding-left: 0pt;counter-reset: h1 1; }
#l10> li>*:first-child:before {counter-increment: h1; content: counter(h1, decimal)". "; color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l10> li:first-child>*:first-child:before {counter-increment: h1 0; }
#l11 {padding-left: 0pt;counter-reset: c3 1; }
#l11> li>*:first-child:before {counter-increment: c3; content: counter(c1, decimal)"."counter(c2, decimal)"."counter(c3, decimal)" "; color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: bold; text-decoration: none; font-size: 12pt; }
#l11> li:first-child>*:first-child:before {counter-increment: c3 0; }
#l12 {padding-left: 0pt; }
#l12> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l13 {padding-left: 0pt; }
#l13> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l14 {padding-left: 0pt; }
#l14> li>*:first-child:before {content: "o "; color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l15 {padding-left: 0pt; }
#l15> li>*:first-child:before {content: "o "; color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l16 {padding-left: 0pt;counter-reset: c4 1; }
#l16> li>*:first-child:before {counter-increment: c4; content: counter(c1, decimal)"."counter(c2, decimal)"."counter(c3, decimal)"."counter(c4, decimal)" "; color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: normal; text-decoration: none; font-size: 12pt; }
#l16> li:first-child>*:first-child:before {counter-increment: c4 0; }
#l17 {padding-left: 0pt; }
#l17> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l18 {padding-left: 0pt; }
#l18> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l19 {padding-left: 0pt; }
#l19> li>*:first-child:before {content: "o "; color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l20 {padding-left: 0pt; }
#l20> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l21 {padding-left: 0pt; }
#l21> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l22 {padding-left: 0pt; }
#l22> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l23 {padding-left: 0pt; }
#l23> li>*:first-child:before {content: "o "; color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l24 {padding-left: 0pt; }
#l24> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l25 {padding-left: 0pt; }
#l25> li>*:first-child:before {content: "o "; color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l26 {padding-left: 0pt; }
#l26> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l27 {padding-left: 0pt; }
#l27> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l28 {padding-left: 0pt; }
#l28> li>*:first-child:before {content: "o "; color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l29 {padding-left: 0pt; }
#l29> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l30 {padding-left: 0pt; }
#l30> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l31 {padding-left: 0pt; }
#l31> li>*:first-child:before {content: "o "; color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l32 {padding-left: 0pt; }
#l32> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l33 {padding-left: 0pt; }
#l33> li>*:first-child:before {content: "o "; color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l34 {padding-left: 0pt; }
#l34> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l35 {padding-left: 0pt; }
#l35> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l36 {padding-left: 0pt; }
#l36> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l37 {padding-left: 0pt; }
#l37> li>*:first-child:before {content: "o "; color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l38 {padding-left: 0pt; }
#l38> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l39 {padding-left: 0pt; }
#l39> li>*:first-child:before {content: "o "; color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l40 {padding-left: 0pt; }
#l40> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l41 {padding-left: 0pt; }
#l41> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l42 {padding-left: 0pt; }
#l42> li>*:first-child:before {content: "o "; color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l43 {padding-left: 0pt; }
#l43> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l44 {padding-left: 0pt;counter-reset: c3 1; }
#l44> li>*:first-child:before {counter-increment: c3; content: counter(c1, decimal)"."counter(c2, decimal)"."counter(c3, decimal)" "; color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: bold; text-decoration: none; font-size: 12pt; }
#l44> li:first-child>*:first-child:before {counter-increment: c3 0; }
#l45 {padding-left: 0pt;counter-reset: c4 1; }
#l45> li>*:first-child:before {counter-increment: c4; content: counter(c1, decimal)"."counter(c2, decimal)"."counter(c3, decimal)"."counter(c4, decimal)" "; color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: normal; text-decoration: none; font-size: 12pt; }
#l45> li:first-child>*:first-child:before {counter-increment: c4 0; }
#l46 {padding-left: 0pt; }
#l46> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l47 {padding-left: 0pt; }
#l47> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l48 {padding-left: 0pt; }
#l48> li>*:first-child:before {content: "o "; color: black; font-family:"Courier New", monospace; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l49 {padding-left: 0pt; }
#l49> li>*:first-child:before {content: " "; color: black; font-family:Wingdings; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l50 {padding-left: 0pt;counter-reset: c4 1; }
#l50> li>*:first-child:before {counter-increment: c4; content: counter(c1, decimal)"."counter(c2, decimal)"."counter(c3, decimal)"."counter(c4, decimal)" "; color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: normal; text-decoration: none; font-size: 12pt; }
#l50> li:first-child>*:first-child:before {counter-increment: c4 0; }
#l51 {padding-left: 0pt; }
#l51> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
#l52 {padding-left: 0pt;counter-reset: c2 1; }
#l52> li>*:first-child:before {counter-increment: c2; content: counter(c1, decimal)"."counter(c2, decimal)" "; color: black; font-family:Calibri, sans-serif; font-style: normal; font-weight: bold; text-decoration: none; font-size: 14pt; }
#l52> li:first-child>*:first-child:before {counter-increment: c2 0; }
#l53 {padding-left: 0pt;counter-reset: c3 1; }
#l53> li>*:first-child:before {counter-increment: c3; content: counter(c1, decimal)"."counter(c2, decimal)"."counter(c3, decimal)" "; color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: bold; text-decoration: none; font-size: 12pt; }
#l53> li:first-child>*:first-child:before {counter-increment: c3 0; }
li {display: block; }
#l54 {padding-left: 0pt; }
#l54> li>*:first-child:before {content: " "; color: black; font-family:Symbol, serif; font-style: normal; font-weight: normal; text-decoration: none; font-size: 11pt; }
table, tbody {vertical-align: top; overflow: visible; }
</style>
</head>
<body>
<h1 style="padding-left: 8pt;text-indent: 0pt;line-height: 32pt;text-align: left;">User Manual</h1>
<h2 style="padding-top: 10pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">
US Department of Agriculture Snow Survey and Water Supply Forecasting Program Multi Model Machine-learning Metasystem (M
<span class="s1">4</span>
)
</h2>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
<p class="s2" style="padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">First complete draft 10 March 2023, S.W. Fleming, NRCS National Water and Climate Center<br>Last modified 5 May 2023, S.W. Fleming, NRCS National Water and Climate Center</p>
<p class="s2" style="padding-top: 7pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">
<a href="mailto:[email protected]" style=" color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: normal; text-decoration: none; font-size: 11pt;" target="_blank">For information contact Beau Uriona (</a>
<span style=" color: #0562C1; font-family:Calibri, sans-serif; font-style: italic; font-weight: normal; text-decoration: underline; font-size: 11pt;">[email protected]</span>
<a href="mailto:[email protected]" style=" color: black; font-family:Calibri, sans-serif; font-style: italic; font-weight: normal; text-decoration: none; font-size: 11pt;" target="_blank">) or Sean Fleming (</a>
<span style=" color: #0562C1; font-family:Calibri, sans-serif; font-style: italic; font-weight: normal; text-decoration: underline; font-size: 11pt;">[email protected]</span>
) at the USDA Natural Resources Conservation Service
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
<ol id="l1">
<li data-list-text="1">
<h3 style="padding-left: 44pt;text-indent: -36pt;text-align: left;">
What is M
<span class="s3">4</span>
?
</h3>
<p style="padding-top: 9pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
The multi-model machine-learning metasystem (M
<span class="s4">4</span>
) is the prototype next-generation mathematical and software model for operational water supply forecasting (seasonal river flow volume prediction) built for and employed by the Snow Survey and Water Supply Forecasting (SSWSF) Program of the US Department of Agriculture (USDA) Natural Resources Conservation Service (NRCS), which spans close to 600 forecast locations across the American West. It will replace the linear principal component regression (PCR) method that the NRCS SSWSF Program, and in particular the National Water and Climate Center (NWCC), has used as its primary (but not sole) water supply forecasting (WSF) model in this West-wide NRCS operational role since the early 1990s. M
<span class="s4">4</span>
leverages a broadly PCR-like architecture and philosophy but augments it with multi-model ensemble modeling for improved forecast stability and accuracy, explainable and automated machine learning/artificial intelligence (ML/AI) for improved forecast accuracy, evolutionary computing to support global feature optimization, more advanced statistical methods for prediction uncertainty estimation under non-Gaussian and heteroscedastic error distributions, and other advances that were directly motivated by the day-to-day operational requirements of a governmental service-delivery organization performing operational water supply forecasting at scale. It is written in the free, open-source R scientific/statistical computing language.
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="2">
<h3 style="padding-left: 44pt;text-indent: -36pt;text-align: left;">Disclaimer</h3>
<p style="padding-top: 9pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
The NWCC posts the M
<span class="s4">4</span>
code publicly here for four reasons: (a) to satisfy internal NRCS archiving, access, and version-control needs around the code and its documentation, including instructions for its use; (b) to demonstrate full transparency in the techniques used by the NRCS SSWSF Program, and in so doing, and in conjunction with extensive vetting of the M
<span class="s4">4</span>
methodology and performance in the peer- reviewed science and engineering literature, to support the technical credibility of the program and public confidence in its products; (c) as a practical mechanism for sharing M
<span class="s4">4</span>
with partner agencies and organizations; and (d) to facilitate and encourage further refinement and development of M
<span class="s4">4</span>
by making it available as open-source code to the STEM research community.
</p>
<p style="padding-top: 7pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Any purpose to which this code is put by external users downloading M
<span class="s4">4</span>
from this site is entirely at the discretion of those users, who accept full responsibility for such uses and their consequences, which are in general beyond our knowledge or control. M
<span class="s4">4</span>
is an operational prototype, and while it has undergone successful testing, much of which has been documented in the peer-reviewed science and engineering literature, the possibility of bugs or errors cannot be ruled out (see also Section 6 below).
</p>
<p style="padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Note that operational forecasts officially issued by the NRCS under a specific set of user protocols and institutional controls, irrespective of whether those forecasts were or were not developed in part using guidance from M
<span class="s4">4</span>
, are distinct from the predictions and performance of the M
<span class="s4">4</span>
code itself as posted here to GitHub and for which no claims are made as to accuracy or value. Additionally, all environmental analysis and prediction software, including M
<span class="s4">4</span>
, has inherent limitations and idiosyncrasies. The characteristics of the prototype M
<span class="s4">4</span>
probabilistic nonlinear regression metasystem are such that it may be useful for purposes other than the WSF applications for which it was designed, but while we applaud exploration of such additional applications, we emphasize that absolutely no testing of any such applications has been undertaken by the NRCS and no claims as to its performance in or suitability for such applications are made. No warranties, guarantees, or any other assurances whatsoever around M
<span class="s4">4</span>
and its results in any application, WSF-related or not, are provided. Users who believe they have discovered a problem are encouraged to report it to the NRCS.
</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Users of M
<span class="s4">4</span>
should read and understand this user manual in its entirety, including the externally peer- reviewed journal articles in its Appendices which describe the M
<span class="s4">4</span>
prototype. Note also that while it is hoped that the current distribution of the M
<span class="s4">4</span>
prototype will be taken as a starting point for additional M
<span class="s4">4</span>
development, and there is no shortage of potential future directions that such further development might take, developers are responsible for their own results and should familiarize themselves thoroughly with the peer-reviewed literature on M
<span class="s4">4</span>
and with the code itself in its entirety: there are a lot of moving parts in M
<span class="s4">4</span>
, many of which are interconnected, especially as they relate to functionalities specifically directed to operational WSF environments and standard WSF use-cases and AutoML functionalities (see below). Commenting in the code is fairly ubiquitous, which may hopefully help scientific software developers decipher the intent and rationales of certain code snippets and the overall workflows in M
<span class="s4">4</span>
.
</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
While just about anyone is welcome to experiment with M
<span class="s4">4</span>
, please be aware that some required but not necessarily sufficient prerequisites for using M
<span class="s4">4</span>
correctly, using its results appropriately, and accurately understanding its nature and capabilities, are good familiarity with current standard procedures in statistical WSF in the western US as implemented by NRCS and other similar operational service-delivery organizations, solid experience with R or at a minimum other similar interpreted scientific computing languages, and a thorough understanding of the principles, goals, methods, and capabilities and limitations of M
<span class="s4">4</span>
as described in the peer-reviewed literature and this manual.
</p>
<p style="padding-top: 7pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">The code, as it is posted here, should be viewed as experimental – use at your own risk! See also the GitHub license agreement.</p>
</li>
<li data-list-text="3">
<h3 style="padding-top: 2pt;padding-left: 44pt;text-indent: -36pt;text-align: left;">
A synopsis of M
<span class="s3">4</span>
philosophy, context, and basic workflow
</h3>
<p style="padding-top: 9pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Full explanations of M
<span class="s4">4</span>
and issues motivating its development and design choices are provided in the Appendices, which users should read and digest. The following is a short summary of the general context, philosophy, and workflows associated the new NRCS forecasting metasystem.
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
<ol id="l2">
<li data-list-text="3.1">
<h4 style="padding-left: 43pt;text-indent: -36pt;text-align: left;">Design criteria</h4>
<p style="padding-top: 9pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
M
<span class="s4">4</span>
did not transition from research to operations (R2O) per se. Rather, a more pragmatic applied- science and engineering paradigm was used: operational requirements were first defined, and M
<span class="s4">4</span>
was then designed and built to meet those requirements. These specific design criteria for operational WSF at NRCS, many of which are applicable more generally to operational hydrologic forecasting at other service-delivery organizations, included:
</p>
<ol id="l3">
<li data-list-text="1.">
<p style="padding-top: 7pt;padding-left: 43pt;text-indent: -18pt;text-align: left;">Improved forecast accuracy</p>
</li>
<li data-list-text="2.">
<p style="padding-top: 1pt;padding-left: 43pt;text-indent: -18pt;text-align: left;">Improved potential for automation (largely an “over-the-loop” system)</p>
</li>
<li data-list-text="3.">
<p style="padding-top: 1pt;padding-left: 44pt;text-indent: -18pt;text-align: left;">Relatively low cost & good ease of development, implementation, and operation</p>
</li>
<li data-list-text="4.">
<p style="padding-top: 1pt;padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">Address known technical issues with the existing system by seamlessly accommodating nonlinear functional forms, and heteroscedastic and non-Gaussian residuals requiring time- varying & asymmetric prediction intervals, without the use of predictand transforms</p>
</li>
<li data-list-text="5.">
<p style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">Flexible, modular, expandable: relatively easy ability to integrate emerging new techniques as appropriate</p>
</li>
<li data-list-text="6.">
<p style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">Physics-aware AI: hydrologic theory-guided system, generating geophysically explainable forecasts</p>
</li>
<li data-list-text="7.">
<p style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">Multi-model ensemble paradigm: address equifinality and model selection uncertainty, with robustness over diverse geophysical environments across the US West and Alaska</p>
</li>
<li data-list-text="8.">
<p style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">Accommodate high-dimensional predictor datasets & potential for multiple independent input signals: will only grow more important with increasing use of highly spatially distributed input datasets such as those from remote sensing or climate modeling products</p>
</li>
<li data-list-text="9.">
<p style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">Balance innovation and performance gains vs. established building blocks and proven tools: apply most advanced yet acceptable (MAYA) industrial design principle.</p>
</li>
</ol>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="3.2">
<h4 style="padding-left: 43pt;text-indent: -36pt;text-align: left;">MAYA</h4>
<p style="padding-top: 9pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">The MAYA principle was introduced by the renown French-American industrial designer, Raymond Loewy. In the context of a next-generation WSF system, MAYA signifies a compromise between adopting the most advanced techniques coming out of the research community, which offer great promise but remain largely unproven in real-world high-stakes applications having strong organizational accountabilities, and established technologies and practices that have proven themselves and achieved buy-in from the water resource science, engineering, and management communities but may not represent the best-performing methods now available. The goal is a unique new mission-specific prediction model, using a carefully selected suite of up-to-date methods from the geophysical modeling</p>
<p style="padding-top: 2pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">and data science communities, yet one that also corresponds to a ‘hot-rodded’ version of the proven and accepted PCR-based approach to WSF.</p>
<ol id="l4">
<li data-list-text="3.2.1">
<p class="s5" style="padding-top: 7pt;padding-left: 44pt;text-indent: -36pt;text-align: left;">
Synopsis of some conventional elements of M
<span class="s6">4</span>
</p>
<p style="padding-top: 9pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">
The conventional elements of M
<span class="s4">4</span>
include a PCR-like structure, adoption of the canonical use-case for statistical WSF in the western US, quantitative estimation of forecast uncertainty in the form of prediction intervals, incorporation of individually well-proven techniques, and an emphasis on the ability to easily generate a relatable storyline for any given forecast on any given day.
</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Linear PCR was introduced to operational WSF by the NRCS in the early 1990s as a replacement for earlier linear regression methods. Its primary advantage is that it provides a theoretical and practical framework for performing linear regression with multicollinear predictors. The basic workflow of PCR involves performing a principal component analysis (PCA) on the input data matrix, and using one or more of the resulting modes, starting with the leading mode, as predictors in an otherwise standard linear regression to predict seasonal river flow volume. Because inputs are mainly measurements of snow water equivalent (SWE) and other similar hydroclimatic parameters, the leading PCA mode effectively serves as an index of watershed-scale hydroclimatic conditions. Higher modes may be retained, reflecting more subtle hydroclimatic patterns, or capturing other processes like aquifer-stream interactions if antecedent streamflow is additionally included as a predictor. Out-of-sample prediction performance is normally found as the leave-one-out cross-validated (LOOCV; sometimes called jackknifed though strictly speaking this refers to something slightly different) standard error, root mean square error, coefficient of determination, or similar metric. This LOOCV procedure is standard practice in statistical WSF systems deployed operationally by service-delivery organizations in western North America, and larger cross-validation folds are generally not required because, while many hydroclimatic variables are serially correlated, the residuals from WSF regression models typically demonstrate little or no statistically significant autocorrelation at the annual timescale considered in the canonical western US statistical WSF use-case (see below; see also examples given later in this manual, and in the M
<span class="s4">4</span>
distribution files; the appendices also provide more information). That cross-validated performance measure then provides the basis for forming prediction intervals, expressed for example in NRCS practice as 90%, 70%, 30%, and 10% exceedance flows. In particular, a simple but common heuristic is used: the probability density function for model error is taken to be a stationary Gaussian (normal) distribution, with a mean equal to that deterministic prediction and a standard deviation equal to the LOOCV standard error or more-or-less equivalently the root mean square error, and the exceedance flows are estimated as the 0.10, 0.30, 0.70, and 0.90 quantiles of that distribution. The entire workflow may be wrapped in an automated system for identifying optimal predictors and PCA modes to retain.
</p>
<p style="padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
The foregoing processes have been implemented in many software systems over the years, including the VIPER program used operationally by NRCS. This proven overall procedure was in many, but not all, respects largely replicated in M
<span class="s4">4</span>
, which augments it with the use of multi-model ensemble modeling, a genetic algorithm for optimal feature extraction, and more sophisticated and flexible methods for estimating prediction uncertainty.
</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">The canonical use-case for statistical WSF in the western US involves a single predictand, which is the upcoming spring-summer flow volume at a given location on a given river; predictors consisting mainly of current SWE and year-to-date precipitation at various locations in and near the watershed, and possibly antecedent streamflow or an ENSO index; and a different model for each forecast point-</p>
<p style="padding-top: 2pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">publication date-target period triplet. To elaborate, operational statistical WSF models at practical service-delivery organizations (SDOs) having strong accountabilities for reliably producing and publicly disseminating forecast information are organized around solving a specific forecasting problem (SFP). An SFP is a unique combination of annual publication date, target period, and forecast point.</p>
<p style="padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">Publication date is the day of the year that the forecast is issued. Common examples are January 1, February 1, March 1, April 1, May 1, or June1, but others are possible depending on the particular river, reflecting local hydroclimatic characteristics and water management concerns. The target period defines the upcoming seasonal timeframe for which accumulated river runoff volume is being forecasted; a prevalent example is April-July but others are commonly used as well, again depending on the particular river, reflecting local hydroclimatic characteristics and water management concerns. The forecast point is a particular location on a particular river for which a runoff forecast is being made.</p>
<p style="padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
There may be more than one forecast point on a given river, especially long rivers; but each forecast point corresponds to a separate SFP. An example of a SFP is a water supply forecast, issued on February 1 (publication date), of the total April-July flow volume (target period) of the Deschutes River below Snow Creek (forecast point). A unique model is built for each SFP. However, there may be a great deal of overlap between models for similar SFPs. For example, the January 1 and April 1 publication-date models for the Deschutes River below Snow Creek may use updated versions of the same predictors, such as SWE at some SNOTEL station on January 1 and on April 1, respectively. However, the relevance of certain predictors may also change over the course of the forecast season reflecting seasonal hydroclimatic processes, so the optimal predictor set may gradually change as well; for example, ENSO indices, in regions where they are useful, tend to provide much of their value relatively early in the forecast season when they provide some information about upcoming winter-spring rainfall and snowfall. M
<span class="s4">4</span>
was built for this canonical use-case, albeit with an eye to make better use of emerging predictors.
</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Western water management can be a very high-stakes enterprise, and the water managed using WSFs may have values ranging into the billions or trillions of dollars every year. While this militates on the one hand for using the best available prediction technologies, responsibilities around sound decision- making, and the economic and political ramifications of making bad choices, also call for a certain level of prudence and conservatism. This risk-aversion leads to a reliance on proven methods. M
<span class="s4">4</span>
employs a proven PCR-like architecture and adheres to the canonical WSF use-case, as described above, and its constituent elements consist of methods that have individually been widely tested and accepted in physical, statistical, and machine learning applications.
</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Similarly, most water resource scientists, engineers, and managers hold low trust in predictions that are not geophysically explainable. A readily identifiable and relatable ‘storyline’ for any given forecast on any given day is, in practice, a necessary ingredient of any operational WSF modeling system at a SDO. This typically consists of a general explanation of why a certain forecast was obtained, in light of well- understood terrestrial hydrologic processes and current watershed-scale climatic (snowpack, soil moisture, etc.) conditions. Producing such a physical explanation of the forecast does not necessarily require a process simulation-based hydrologic model. Use of PCR by well-trained operational hydrologists, for example, has produced clear hydroclimatic storylines for decades. The ability to continue this forward into the machine-learning world was one of the most important and challenging design goals for M
<span class="s4">4</span>
.
</p>
</li>
<li data-list-text="3.2.2">
<p class="s5" style="padding-top: 7pt;padding-left: 44pt;text-indent: -36pt;text-align: left;">Synopsis of some fresh elements</p>
<p style="padding-top: 3pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
New elements in M
<span class="s4">4</span>
include a multi-model ensemble forecasting framework, the first wide-scale incorporation of AI into a governmental SDO operational WSF system, evolutionary computing, and advanced statistical prediction uncertainty estimation.
</p>
<p style="padding-top: 8pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
The final product of M
<span class="s4">4</span>
is a multi-model ensemble mean forecast. M
<span class="s4">4</span>
consists of six semi-independent forecast modeling systems acting in parallel and may be viewed as a system of systems (hence ‘metasystem’). Each of the constituent modeling systems has a PCR-like architecture as noted above but replaces simple linear regression and prediction intervals based on stationary normal error distributions with various advanced statistical or machine learning algorithms. The results of all six are averaged together to create the ensemble mean forecast. Advantages of this approach are that it deals with model equifinality, that is, similar overall performance from several quite different models, and provides more stable, and in some cases, more accurate results than any of the individual models alone. It is a standard operational approach in numerical weather prediction and climate modeling, it is very well- established in applied hydrologic research using process-based models, and it is used within some machine learning approaches, such as bootstrap aggregation (bagging) within some neural networks.
</p>
<p style="padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">However, multi-model ensembles have not, in general, migrated into operational river forecasting systems at SDOs (although related but simpler concepts like extended (or ensemble) streamflow prediction (ESPs), which use a single hydrologic model with multiple climate traces, made the jump decades ago). We emphasize that using the results of a multi-model hydrologic prediction system wisely can require operating in a different headspace than some operational hydrologists may be accustomed to. For example, hunting-and-pecking for which of the six constituent models works ‘best’ for some particular combination of forecast point, publication date, and target period misses the point of the exercise and can lead to wasted time and ultimately, overall, an underperforming modeling strategy.</p>
<p style="padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">More background on this sometimes-counterintuitive approach can be found in the Appendices, and the publications cited in the Appendices.</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">Research using AI for river forecasting dates back to 1995 or possibly earlier, and its superior prediction accuracy has been proven repeatedly. Mainstreaming of AI into wider and operational hydrologic applications has been much more recent and limited due to various stumbling blocks. These included a lack of prediction uncertainty intervals; a lack of explainability, i.e., the so-called black-box problem; skepticism in high-stakes operational settings where existing methods are well-established and new methods coming out of the research community are often viewed as lacking real-world relevance and credibility; and misunderstandings and lack of technical training and professional familiarity among many physical scientists in both the research and operational communities regarding AI and its real limitations and strengths. But things are changing, as almost everyone is getting used to the idea of AI, machine learning tools have become far more accessible and methods are far more diversified, and explainability, overtraining, prediction uncertainty estimation, and other questions can be addressed with appropriate AI design and implementation choices. WSF applications of AI, however, still have to be approached carefully and with the goal of satisfying specific user needs, rather than throwing machine learning at a problem and expecting the result to be useful and accepted.</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Accomplishing this, as noted above, was a key goal when developing M
<span class="s4">4</span>
. The approach and results are documented in the Appendices, and some cursory summaries are provided in the following subsections.
</p>
<p style="padding-top: 2pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">Before forging ahead, it’s probably also useful to provide a glossary of some basic machine learning jargon, bearing in mind however that these terms are evolving quickly and may be used in slightly different ways in different fields:</p>
<ul id="l5">
<li data-list-text="">
<p class="s2" style="padding-top: 7pt;padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">
Data science
<span class="p">Algorithms for building data-based information/knowledge pipelines, often to enable automated predictions and actions; spans traditional statistical modeling and AI</span>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">
Data mining
<span class="p">Extracting descriptive, explanatory, or predictive patterns from data, typically without clear a priori expectations around specific causal relationships</span>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
Big data
<span class="p">Satisfy three Vs of volume, velocity, and variety: gigantic and never-ending torrents of data having a wide & unpredictable content range (e.g., YouTube uploads). Strictly speaking, few environmental datasets satisfy this definition, but big-data methods can nevertheless be useful. A fourth V – veracity – is also sometimes included.</span>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">
Artificial intelligence (AI)
<span class="p">Technologies that emulate human intelligence in various ways, in turn including several subdisciplines, like machine learning and robotics</span>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">
Machine learning (ML)
<span class="p">An AI field using algorithms to identify patterns in data and, typically, applying to those patterns to make predictions; divided into classification or regression, and unsupervised or supervised methods; examples include a wide variety of artificial neural network (ANN) types, random forests (RFs), support vector machines (SVMs), and deep learning (DL) architectures like long short term memory (LSTM) networks and convolutional neural networks (CNNs); these algorithms are often inspired by biological systems, such as the human brain (ANNs) or human decision-making processes (the decision trees in RFs)</span>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">
Hyperparameters
<span class="p">AIs have parameters loosely analogous to coefficients in a linear statistical model (e.g., a neural network’s neuron weights) that are optimized in training; but AIs additionally have higher-level hyperparameters (e.g., a neural network’s learning rate) that control this process of estimating parameter values and the overall architecture</span>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">
Features and targets
<span class="p">Akin to predictors and predictands, respectively, in traditional statistical modeling; features engineering is the procedure of processing/manipulating input data to extract and select features to be passed to the AI as predictor variables, a major element of AI application; one of the goals of deep learning is to integrate features engineering into the core of the machine learning algorithm, avoiding the need to perform features engineering explicitly; conversely, user-directed features engineering has been identified as a type of physics-informed machine learning</span>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">
AutoML
<span class="p">System to automatically build the best AI for a given dataset; may include optimal hyperparameter selection and is intended to make ML easier for non-AI specialists to apply in respective practice domains; also creates the need for still-higher-level ‘hyperhyperparameters’</span>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">
Overtraining and regularization
<span class="p">Overtraining is memorization of the training data, compromising generalization accuracy; it happens in all models involving calibration or derivation of parameters from observational data, not just ML, including physics-based and conventional linear statistical models, but it can be particularly acute for ML due the tremendous fitting ability of machine learning algorithms; however, it is easily detectable in validation; regularization refers to technical methods that mitigate it.</span>
</p>
</li>
</ul>
<p style="padding-top: 7pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Evolutionary computing is also used in M
<span class="s4">4</span>
. This refers to a family of computing techniques that emulate biological evolutionary processes. M
<span class="s4">4</span>
deploys a genetic algorithm, which is a type of optimization procedure, for optimizing the features (specifically, the combination of input predictor variables and
</p>
<p style="padding-top: 2pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">PCA modes) fed to its constituent modeling systems. The goal is identical to techniques often conventionally used for optimizing these choices, such as the tree algorithm used in the NRCS VIPER implementation of PCR, but the methods it uses are more advanced. Of particular note is that it is a stochastic optimization scheme, which helps avoids having the solution process get trapped in local minima in a complex nonlinear error space, contributing to a greater likelihood of obtaining a global (true) optimum.</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Considered in aggregate, the foregoing attributes mean that M
<span class="s4">4</span>
can be viewed as a community of artificial lifeforms. Each of these AIs consists of an artificial intelligence, and following rules adapted from genetics, each evolves over a series of generations in such a way as to perform the best it can in its environment. Ultimately, this community of six artificial lifeforms reaches a consensus decision on the best WSF and how much certainty it collectively has in that decision.
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
</ol>
</li>
<li data-list-text="3.3">
<h4 style="padding-left: 43pt;text-indent: -36pt;text-align: left;">Explainable machine learning</h4>
<p style="padding-top: 9pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Several pragmatic, somewhat WSF-specific steps were taken in M
<span class="s4">4</span>
design to dovetail machine learning with the underlying process physics of watershed hydrology:
</p>
<ul id="l6">
<li data-list-text="">
<p class="s2" style="padding-top: 7pt;padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">
Operational hydrologist-based features engineering
<span class="p">User-directed selection of predictive data, and how that data are compressed and fed to supervised learning algorithms, reflects end-user knowledge around representativeness, reliability, quirks, and capabilities of potential input variables and measurement sites and geophysical interpretations of PCA modes. That the hydrologist should pick what data to use and how to process it may seem obvious, but it is somewhat of a counterpoint to some big data-focused deep learning approaches, where large datasets are fed to complex multi-layer neural networks that, in a nutshell, perform features engineering automatically. It is a key location in AI development process for domain experts to insert their physical hydrologic knowledge.</span>
</p>
</li>
<li data-list-text="">
<p style="padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
<i>A priori physicality constraints.</i>
Some feature-target relationships are known to be significantly nonlinear but monotonic. For example, a high-snow winter, all other things being equal, won’t produce a low-flow spring and summer. Certain AI methods allow a monotonicity constraint to be enforced. While overall monotonicity of the ensemble mean relationship is not strictly enforced, M
<span class="s4">4</span>
strongly encourages that net behavior, as two of its six methods are intrinsically monotonic and another two have the option, which is activated in the default application of M
<span class="s4">4</span>
, of enforcing monotonicity constraints. Similarly, negative-valued volume predictions are non- physical yet can happen in some prediction systems, and certain AI methods allow a non- negativity constraint to be enforced. Several such methods are used here. Moreover, the use of nonlinear ML techniques tends to avoid the generation of negative-valued best-estimate predictions. M
<span class="s4">4</span>
also includes algorithmic logic to sequentially prune ensemble members if the 90% exceedance flow in the ensemble mean forecast distribution violates the nonnegativity constraint for any sample in the training interval, though this does not necessarily guarantee that a forecast going forward is strictly nonnegative. Note that monotonicity and nonnegativity constraints are simple forms of theory-guided data science, and that by severely restricting the solution space available to the MLs, such geophysical plausibility constraints improve both regularization and explainability.
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-top: 3pt;padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
WSF is purposefully framed as low-dimensional problem with a parsimonious solution
<span class="p">. PCA pre- processing dramatically reduces problem dimensionality, and the predictand is a simple scalar. Additionally, considerable effort and testing was investing in identifying default hyperparameters and AutoML algorithms that help create the most parsimonious ML architecture possible for the canonical statistical WSF use-case. The result is, in most such applications, an extremely compact ML problem with one or two inputs and a single output. The relationships between those inputs and outputs can be easily visualized in a x-y or contour plot, allowing the user to see what the ML is thinking. Another advantage of low-dimensional features and targets and compact architectures (e.g., minimizing the number of hidden-layer</span>
</p>
</li>
</ul>
<p style="padding-left: 43pt;text-indent: 0pt;line-height: 108%;text-align: left;">neurons) is that it reduces the number of parameters that must be trained, making the ML more amenable to application using small sample sizes.</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="3.4">
<h4 style="padding-left: 43pt;text-indent: -36pt;text-align: left;">AutoML</h4>
<p style="padding-top: 9pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
While completely hands-off operation of a WSF system is in general not desired by SDOs, streamlining and automation of many tasks is, both in the interest of operational efficiency and to make a system like M
<span class="s4">4</span>
accessible to users who expertise lies in areas other than AI. This leads to judicious use of automatic, or autonomous, machine learning (commonly dubbed AutoML). Its use dovetails with “over-the-loop” hydrometeorological forecasting concepts. In particular:
</p>
<ul id="l7">
<li data-list-text="">
<p style="padding-top: 7pt;padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">Algorithmic logic is introduced to automate most optimization/decision points (including ML hyperparameters). However, manual overrides remain available.</p>
</li>
<li data-list-text="">
<p style="padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">Other hyperparameters are set to robust default values on the basis of extensive experimentation using various NRCS WSF test cases and problem setups. For example, hundreds of metasystem runs were completed to locate values for the population size and the number of generations in the genetic algorithm that work as generally serviceable defaults for the canonical statistical WSF use-case.</p>
</li>
</ul>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">The ability to override these defaults and AutoML algorithms is also considered important at SDOs, providing hydrologists with an opportunity to make adjustments in accordance with their professional judgement.</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="3.5">
<h4 style="padding-left: 43pt;text-indent: -36pt;text-align: left;">
M
<span class="s7">4</span>
development approach, process, and status
</h4>
<p style="padding-top: 9pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Why choose R? There were four reasons. First, it’s free for everyone. Second, the design criteria for M
<span class="s4">4</span>
included some very specific requirements around a priori physicality constraints and non-standard statistical properties of the forecast distributions, and to our knowledge, these are most readily, and perhaps exclusively, available in certain existing R packages previously written by others and freely available on the CRAN server. Third, by downloading RStudio (and R along with it) and installing the needed packages directly from within that GUI by a simple point-and-click process, everything needed to run M
<span class="s4">4</span>
can be obtained very easily by one-stop (free) shopping with very little or no system configuration on the user’s part, further improving access. And fourth, it’s one of the most widely used scientific computing languages, and therefore likely to be familiar to a relatively wide range of users.
</p>
<p style="padding-top: 2pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">Basically, it came down to a question of some specifical technical requirements plus general accessibility.</p>
<p style="padding-top: 7pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">
The M
<span class="s4">4</span>
development process began in early 2017 with a partnership between NRCS and White Rabbit R&D LLC facilitated through Elyon International Inc. Over the next three years, through a collaborative and consultative process, the existing PCR-based VIPER system at NRCS was evaluated, alternative approaches were explored under the constraint that only data-driven methods were to be considered, the attractiveness of developing a new system was identified along with its desired characteristics, and an initial research prototype was jointly developed with some preliminary testing, documentation, and external vetting through the engineering journal peer-review process. Work on M
<span class="s4">4</span>
since 2019 has been undertaken exclusively by NRCS and has included extensive retrospective testing, live operational testing for selected test sites during the 2020 and 2021 water supply forecast seasons, some further code debugging and refinement, and extensive documentation with additional external vetting through scientific journal peer-review processes, leading to the more refined and better-proven prototype archived here.
</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Considerable effort has also been undertaken by NRCS in partnership with a software development team at Colorado State University to build Persephone, a prototype production platform for large-scale operational deployment of M
<span class="s4">4</span>
. The existing M
<span class="s4">4</span>
code sits inside the Persephone wrapper. Persephone adopts a Software-as-a-Service (SaaS) framework residing on a private cloud. This enables increased levels of parallelization, multi-user server-based access for an operational forecast team, and variety of enhanced user-focused practical capabilities: GUI, IT/database linkages, interactive capabilities around graphics, mapping and geospatial analysis, features engineering, data pre-processing and forecast distribution post-processing, and job-queuing, for example. A prototype of Persephone-M
<span class="s4">4</span>
became operational at NRCS on a limited testing and forecast process-augmentation basis during the 2023 water supply forecast season. Note that this GitHub distribution of the M
<span class="s4">4</span>
prototype does not include Persephone.
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="3.6">
<h4 style="padding-left: 43pt;text-indent: -36pt;text-align: left;">
General workflow of M
<span class="s7">4</span>
</h4>
<p style="padding-top: 9pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
There are two main steps in the M
<span class="s4">4</span>
WSF modeling process – building a model, and then running it in forecast operations – which closely mirror the steps in other hydrologic forecast models.
</p>
<ol id="l8">
<li data-list-text="3.6.1">
<p class="s5" style="padding-top: 7pt;padding-left: 44pt;text-indent: -36pt;text-align: left;">The build step</p>
<p style="padding-top: 9pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Training is the first step. It amounts to building a M
<span class="s4">4</span>
model suite, including data selection, setting some run control parameters, fitting the various constituent modeling systems within M
<span class="s4">4</span>
, and saving the outcomes. It can loosely be viewed as a geophysical model inversion process, in which model parameters are estimated from observational data. Training is akin to fitting a linear regression model in statistical WSF, or calibrating model parameters in a process-based model used for WSF.
</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
In general, and in brief, training involves the following processes within M
<span class="s4">4</span>
(see also foregoing sections in this manual and, in particular, the Appendices). As noted above, there are currently six semi- independent models in M
<span class="s4">4</span>
, each built around a different regression-type supervised learner. These are a monotone artificial neural network, a monotone composite quantile regression neural network, a
</p>
<p style="padding-top: 2pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
support vector machine (more specifically, support vector regression), random forests, linear quantile regression, and a standard linear regression as in conventional PCR. A sample of input variables is drawn from a pool of candidates, a principal component analysis (PCA) is performed on it, and principal component time series for a certain number of PCA modes – the features, in ML jargon – are fed to one of the six constituent models as predictor variables. That model is then trained by fitting parameters, and to some extent hyperparameters through AutoML processes, using that sample dataset. The accuracy of these intermediate results is assessed. A type of global optimization approach drawn from evolutionary computing, called a genetic algorithm, then efficiently iterates through various combinations of input variables and corresponding PCA modes, ultimately selecting the most accurate combination. This is repeated independently for each of the six constituent models. (As such, each of the constituent modeling systems is a variant on the PCR approach; for example, principal component support vector machine, which we abbreviate here as PCSVM.) When completed, the results from all the models – consisting of a best estimate, and prediction intervals (uncertainty estimates) expressed as 10%, 30%, 70%, and 90% exceedance volumes – are averaged together to create the ensemble mean prediction. Because river runoff volume cannot be negative, the final training-period ensemble is tested for that condition, and if needed, the results of individual models contributing most the problem are removed one by one. In general, all of this is done internally and automatically by M
<span class="s4">4</span>
.
</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
From a user’s perspective, the procedure only involves providing a historical dataset to M
<span class="s4">4</span>
for the combination of forecast point, publication date, and target period that the model is being built for, along with some instructions to M
<span class="s4">4</span>
about how the user would like M
<span class="s4">4</span>
to run for that particular problem.
</p>
<p style="padding-top: 8pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
For the canonical use-case in statistical WSF as described above, the default instructions (this is explained more below) often don’t need to be modified in practice. In that case all the user needs to do is decide what predictor data they’d like to use – which SNOTEL snow and precipitation sites they want, maybe antecedent streamflow at the forecast point, perhaps an ENSO index thrown in for good measure, and so forth – just like in conventional statistical WSF. After the input data file containing that information is created by the user, which is an external process completed by the user in a text editor or Excel spreadsheet for instance, M
<span class="s4">4</span>
is then opened and run in R. For typical canonical statistical WSF use- case applications, depending on the computer and other factors, and using default M
<span class="s4">4</span>
parameters and settings including genetic algorithm-based feature optimization with a population size and number of generations found by experimentation to be generally suitable for such applications at a prototype level, the build run might take somewhere between 15 and 35 minutes or so. When it’s done, all important information is written to files by M
<span class="s4">4</span>
. This information includes the trained models.
</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
In general, training is performed only once, to create the trained suite of models and metadata describing them. Note that in usual operational WSF practice, however, models are updated roughly every five years or so by redeveloping them with newly acquired data. There may also be situations where the user would like to try creating a few different versions of a M
<span class="s4">4</span>
modeling suite, using different candidate input variables for example to test the usefulness of new kinds of predictive data, or giving the genetic algorithm in M
<span class="s4">4</span>
either more leeway, or less, to optimize features than in the default case.
</p>
</li>
<li data-list-text="3.6.2">
<p class="s5" style="padding-top: 7pt;padding-left: 44pt;text-indent: -36pt;text-align: left;">The forecast step</p>
<p style="padding-top: 9pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Creating operational forecasts using the previously trained model and new predictive input data is the second step in the M
<span class="s4">4</span>
WSF modeling process. It can loosely be viewed as a forward run of a geophysical
</p>
<p style="padding-top: 3pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
model developed through an inversion process. For example, in the initial build step, a M
<span class="s4">4</span>
ensemble modeling suite might first be trained to predict, on February 1, the April-July flow volume at some particular location on some particular river using, say, 30 years of historical SNOTEL data as input and observed April-July flow volumes over that same 30 year interval as the predictand. Then, in forecast operations, the user takes that trained modeling suite and runs it on February 1 of the current water year, using the present values for the SNOTEL stations, to create this year’s forecast of upcoming April- July flow volume. The user will do the same thing next water year, using the same old trained model and new values for the SNOTEL stations, when their February 1 forecast date rolls around again, and so forth. This is exactly like taking an existing fitted linear regression model in statistical WSF, or an existing calibrated process-simulation model in ESP-based WSF, and running it using new forcing data in order to issue a WSF for the current forecast season.
</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
From a user’s perspective, workflows for such forecast runs are in some sense much simpler than for training: the finished model and metadata around it, along with up-to-date data for the predictors, are all that’s needed. However, the user does need to set some slightly persnickety values in a run-control file; the user simply pulls that information from the output files of the build run (if the genetic algorithm was used for M
<span class="s4">4</span>
feature optimization during the build phase) or, even more simply, repeats information they provided to M
<span class="s4">4</span>
during the build run (if the user switched off genetic algorithm-based feature optimization during the build phase, which they have to option to do, as described below).
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
</ol>
</li>
</ol>
</li>
<li data-list-text="4">
<h3 style="padding-left: 44pt;text-indent: -36pt;text-align: left;">
How to use M
<span class="s3">4</span>
</h3>
<ol id="l9">
<li data-list-text="4.1">
<h4 style="padding-top: 9pt;padding-left: 43pt;text-indent: -36pt;text-align: left;">
M
<span class="s7">4</span>
installation
</h4>
<p style="padding-top: 9pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
The M
<span class="s4">4</span>
prediction analytics engine consists of 14 R script files, described in Section 4.2.1 below. These are a main program (MMPE-Main_MkII.R) plus 13 externally defined functions which perform principal component analysis, neural network modeling, genetic algorithm-based feature optimization, and so forth. The “MMPE” acronym refers to a previous name (multi-model prediction engine) given to an earlier development version of the M
<span class="s4">4</span>
metasystem.
</p>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Installation is straightforward and essentially the same as for more-or-less any R-based software, amounting to downloading the 14 script files and installing the required packages. M
<span class="s4">4</span>
is run by executing the main program, MMPE-Main_MkII.R.
</p>
<p style="padding-top: 7pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">
M
<span class="s4">4</span>
was developed, tested, and run in the RStudio freeware development environment on a Windows PC, and the following, more detailed, installation instructions are tailored to that case:
</p>
<ol id="l10">
<li data-list-text="1.">
<p style="padding-top: 7pt;padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">Download the 14 R script files (which are simply text files with a .R extension) from the GitHub repo. To keep things tidy, you'll presumably want to create a new subdirectory to place them in; you can name it whatever you like. All the model, input, and output files will be in the same folder. It’s good practice – especially when using RStudio, which can be less stable than running R in its basic console – to place this folder high up in the directory structure, to run it on a local hard drive rather than a cloud, and to use reasonably short folder names; a good choice could be something like C:\ThisWhereMyM4RunsAre\.</p>
</li>
<li data-list-text="2.">
<p style="padding-top: 2pt;padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
Download the 3 regular text files (i.e., with a .txt extension) to the same location, which are input files. MMPE_RunControlFile.txt is where the user specifies run options. The other text files are input data files (the sample input file provided here is for the Yellowstone River). As noted above, M
<span class="s4">4</span>
has two basic modes. In inverse or model-building mode, the AIs are trained, optimized, saved, etc., using the calibration-period observational data in MMPEInputData_ModelBuildingMode.txt. In forward or operational forecasting mode, the completed models are read in from files previously created by M
<span class="s4">4</span>
during the build phase, and are then ran with a new sample of the input data, contained in MMPEInputData_ForecastingMode.txt.
</p>
</li>
<li data-list-text="3.">
<p style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">In Windows file explorer, navigate to this new subdirectory and double-click on the main program file, MMPEMain_MkII.R, to open the prediction engine in RStudio. (Alternatively, open RStudio, then from the main menu bar at the top select File -> Open File and choose MMPE- Main_MkII.R.)</p>
</li>
<li data-list-text="4.">
<p style="padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
You'll have to install the non-default R packages used by the M
<span class="s4">4</span>
main program and function files. This is done in RStudio by going to its bottom right window, selecting Packages -> Install and finding the ones you want by typing their names into the bar that pops up. The full list of additional packages needed is:
</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">Akima</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">forecast</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">qrnn</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">e1071</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">randomForest</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">monmlp</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">genalg</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">stringr</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">doParallel</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">foreach</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">quantreg</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">quantregGrowth</p>
<p style="padding-top: 7pt;padding-left: 79pt;text-indent: 0pt;line-height: 108%;text-align: left;">matrixStats</p>
</li>
<li data-list-text="5.">
<p style="padding-top: 7pt;padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
The user-selected options in MMPE_RunControlFile.txt are discussed below. However, one of the parameters the user sets there counts as an installation step. It's the number of cores in the computer's CPU, num_cores, which is used for parallelization of the time-intensive cross- validation calculations in the two neural network methods included among the six supervised learning systems in M
<span class="s4">4</span>
. On a Windows PC, you can find the number of cores your processor has by pressing Ctrl + Shift + Esc to open Task Manager; then select the Performance tab to see how many cores (not logical processors!) there are on your machine.
</p>
</li>
</ol>
<p style="padding-top: 7pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Details on how to run M
<span class="s4">4</span>
from within the RStudio environment are given in Section 4.3 below. As noted above, it is of course possible to run M
<span class="s4">4</span>
directly in R as well, which is less user-friendly but can have advantages like avoiding graphics-related crashes that sometimes occur in RStudio. If a crash occurs and cryptic errors are recorded in the log file (see below), a simple hack that can sometimes work is to just resize the plot (lower right) window in RStudio to make it larger and rerun. It also seems to be good
</p>
<p style="padding-top: 3pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">
practice to shut down and restart RStudio after every few M
<span class="s4">4</span>
runs. Some pointers around directory placement and names that were provided above can help too.
</p>
<p style="padding-top: 7pt;padding-left: 8pt;text-indent: 0pt;text-align: left;">
Note that M
<span class="s4">4</span>
has thus far only been tested on a Windows PC.
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="4.2">
<h4 style="padding-left: 43pt;text-indent: -36pt;text-align: left;">File inventory and descriptions</h4>
<ol id="l11">
<li data-list-text="4.2.1">
<p class="s5" style="padding-top: 9pt;padding-left: 44pt;text-indent: -36pt;text-align: left;">
M
<span class="s6">4</span>
code files
</p>
<p style="padding-top: 9pt;padding-left: 7pt;text-indent: 0pt;line-height: 108%;text-align: left;">
The R files that collectively make up M
<span class="s4">4</span>
are as follows. All are invoked in M
<span class="s4">4</span>
runs during the model-build phase; in forecast runs, only the main program and the three ensemble-creation utility functions are used:
</p>
<ul id="l12">
<li data-list-text="">
<p style="padding-top: 7pt;padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
<i>MMPE-Main_MkII.R</i>
The M
<span class="s4">4</span>
main program. Clears workspace, reads in data, declares external functions, tracks errors, creates (in build mode) the six modeling systems, including ensemble creation and other tasks, along with graphics and output files or runs (in forecast mode) the six existing modeling systems using output files created during the build phase.
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
PCA-Module_MkII.R
<span class="p">Function performing principal component analysis on the predictor variates provided to it; this may be all the predictors in the input data file, MMPEInputData_ModelBuildingMode.txt (described in detail below), if the user specifies in MMPE_RunControlFile.txt (also described in the detail below) that genetic algorithm-based feature optimization is not to be used, or a subset of the predictors in that input data file if GA- based feature optimization is used (see Sections 4.2.2 and 4.3 below). PCA is performed on the correlation, not covariance, matrix.</span>
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 43pt;text-indent: -18pt;text-align: left;">
PCA-Graphics-Module_MkII.R
<span class="p">Function creating some PCA-related plots.</span>
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
GA-PredictorSelection-Module_MkII.R
<span class="p">Function performing genetic algorithm-based feature optimization, if that option is selected by user in the input file, MMPE_RunControlFile.txt; includes some automatic hyperparameter-setting capabilities around the monotone artificial neural network and monotone composite quantile regression neural network algorithms; cost function is root mean square error. Note that the cost is given a penalty if the trial feature set provided by the genetic algorithm in a given iteration includes fewer than MinNumVars of the input predictor variables provided by the user in the building-phase input data file, MMPEInputData_ModelBuildingMode.txt, where MinNumVars is settable by the user in MMPE_RunControlFile.txt but must be > 1 and < number of predictor variates provided by the user in MMPEInputData_ModelBuildingMode.txt and should in general be left at 2. The effect is to ensure that the genetic algorithm is guided away from cases where only a single predictor variable is retained, which is inconsistent with the canonical use-case for statistical WSF in the western US (generally framed around several multicollinear predictor variables). A single-</span>
</p>
<p style="padding-top: 3pt;padding-left: 43pt;text-indent: 0pt;line-height: 108%;text-align: left;">
predictor application of M
<span class="s4">4</span>
is possible for unusual circumstances but does not use genetic algorithm feature optimization (see use instructions below in Section 4.3).
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
PCR-Module_MkII.R
<span class="p">Function performing linear regression model-fitting along with cross- validated performance evaluation and estimation of both stationary Gaussian, and heteroscedastic non-Gaussian, prediction intervals around the best-fit PCR estimates; the latter uses a method based on a Box-Cox transform on the prediction residuals, including estimation of the best-fit Box-Cox transform parameters.</span>
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
PCSVM-Module_MkII.R
<span class="p">As in PCR-Module_MkII.R but for the support vector machine; additionally includes some AutoML capabilities for identifying optimal SVM hyperpameters in accordance with options set by user in in the input file, MMPE_RunControlFile.txt.</span>
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 43pt;text-indent: -18pt;text-align: left;">
PCRF-Module_MkII.R
<span class="p">As in PCR-Module_MkII.R but for random forests.</span>
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
PCANN-Module_MkII.R
<span class="p">As in PCR-Module_MkII.R but for a monotone artificial neural network; additionally includes setting some hyperparameters in accordance with values specified by user in MMPE_RunControlFile.txt and/or delivered to it by AutoML algorithms in MMPE-Main_MkII.R and/or GA-PredictionSelection-Module_MkII.R; uses (if option enabled in MMPE_RunControlFile.txt) light parallelization of embarrassingly parallel ANN cross-validation task across process cores.</span>
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">
PCQR-Module_MkII.R
<span class="p">As in PCR-Module_MkII.R but using quantile regression with enforcement of a quantile non-crossing constraint that can be crucial for small sample sizes; expectation value is taken to be the median; heteroscedastic non-Gaussian prediction intervals around the best-fit QR predictions are estimated using the linear quantile-regression capabilities inherent to the method; due to some quirks with one of the R packages used, while a .Rdata file is produced, the primary information about the trained model is written to a text file instead, unlike the other six modeling systems.</span>
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">
PCMCQRNN_MkII.R
<span class="p">As in PCANN-Module_MkII.R, but for the monotone composite quantile regression neural network; expectation value is taken to be the median; heteroscedastic non- Gaussian prediction intervals around the best-fit predictions are estimated using the nonlinear quantile-regression capabilities inherent to the method.</span>
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="">
<p class="s2" style="padding-left: 44pt;text-indent: -18pt;line-height: 108%;text-align: left;">
InitializeEnsemble-Module_MkII.R, AppendEnsemble-Module_MkII.R, FinalizeEnsemble- Module_MkII.R
<span class="p">Utility functions used in creating multi-model ensemble in accordance with user-specified preferences set in MMPE_RunControlFile.txt input file.</span>
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
</li>
<li data-list-text="">
<p style="padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
<i>Diagnostics-Module_MkII.R</i>
Creates visual diagnostics and statistical measures describing model fit for each of the six constituent modeling systems and the multi-model ensemble mean forecast distribution. Some of these results are written to file, whereas others are graphically depicted in windows that open in R during an M
<span class="s4">4</span>
run.
</p>
</li>
</ul>
<p style="padding-top: 3pt;padding-left: 8pt;text-indent: 0pt;line-height: 108%;text-align: left;">
Note that for standard- or intermediate-level use of M
<span class="s4">4</span>
(see below) no edits are required to any of these files. In other words, it’s strongly suggested you don’t mess with them unless you really know what you’re doing!
</p>
</li>
<li data-list-text="4.2.2">
<p class="s5" style="padding-top: 7pt;padding-left: 44pt;text-indent: -36pt;text-align: left;">Input files</p>
<p style="padding-top: 9pt;padding-left: 8pt;text-indent: 0pt;text-align: left;">
M
<span class="s4">4</span>
has only three input files, and only two of them are used for a given run of the program:
</p>
<ul id="l13">
<li data-list-text="">
<p style="padding-top: 9pt;padding-left: 43pt;text-indent: -18pt;line-height: 108%;text-align: left;">
<i>MMPEInputData_ModelBuildingMode.txt</i>
The input data file for the training phase of M
<span class="s4">4</span>
application. These are the historical data used to fit the M
<span class="s4">4</span>
modeling suite. The file is only required during the build step. Don’t change the name of the file.
</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
<p style="padding-left: 43pt;text-indent: 0pt;line-height: 108%;text-align: left;">
The file contains a (
<i>N</i>
+1) row by (
<i>M</i>
+2) column matrix, where
<i>N</i>
is the number of samples and
<i>M</i>
is the number of predictors. The first column is, in the canonical use-case for western US statistical WSF, the year of the observation but could conceivably be a different sort of numerical sample index; the second column gives the observed flow volume in that year, i.e., the predictand; and subsequent columns give the observed values of various predictor variables in that year, which in the canonical use-case will typically be quantities like SWE, accumulated precipitation, and so forth (see previous sections). The top row is a header, providing information about each of the columns; avoid using ambiguous wildcard characters or spaces in the title for a given column. Columns are tab-separated.
</p>
<p style="padding-top: 7pt;padding-left: 44pt;text-indent: 0pt;line-height: 108%;text-align: left;">
If the genetic algorithm in M
<span class="s4">4</span>
is employed for feature optimization, these predictor variables collectively constitute a pool of candidates, and the GA will pick the ones that work best (see definition of ‘best’ in section 4.2.1 or the Appendices) for a given model. This is done independently for each of the six constituent modeling systems, as each of the corresponding supervised regression-type models have their own strengths and weaknesses and may work best for a different subset of the candidate predictors. The canonical western US statistical WSF use-case is built around several multicollinear predictors. If feature optimization is used, at least three, and in normal WSF use typically somewhere around a half-dozen to two-dozen, candidate input variables would be provided in this file.
</p>
<p style="padding-top: 7pt;padding-left: 43pt;text-indent: 0pt;line-height: 108%;text-align: left;">
If on the other hand the genetic algorithm is not used in M
<span class="s4">4</span>
, all the input variables provided in this file will be used in all the models. In this case, only one predictor variable is necessary (though for canonical western US statistical WSF applications more should generally be provided; see above).
</p>
<p style="padding-top: 8pt;padding-left: 44pt;text-indent: 0pt;line-height: 108%;text-align: left;">Here’s a complete example of the contents of MMPEInputData_ModelBuildMode.txt, using data from 1986 through 2015:</p>
<p style="text-indent: 0pt;text-align: left;">
<br>
</p>
<table style="border-collapse:collapse;margin-left:77.3404pt" cellspacing="0">
<tr style="height:10pt">
<td style="width:30pt">
<p class="s8" style="padding-left: 2pt;text-indent: 0pt;line-height: 8pt;text-align: left;">Year</p>
</td>
<td style="width:43pt">
<p class="s8" style="padding-left: 8pt;text-indent: 0pt;line-height: 8pt;text-align: left;">Obs</p>
</td>
<td style="width:29pt">
<p class="s8" style="padding-left: 1pt;text-indent: 0pt;line-height: 8pt;text-align: left;">LM_P</p>
</td>
<td style="width:41pt">
<p class="s8" style="padding-left: 8pt;text-indent: 0pt;line-height: 8pt;text-align: left;">LM_SWE</p>
</td>
<td style="width:33pt">
<p class="s8" style="padding-left: 3pt;padding-right: 4pt;text-indent: 0pt;line-height: 8pt;text-align: center;">Sig_P</p>
</td>
<td style="width:41pt">
<p class="s8" style="padding-left: 6pt;text-indent: 0pt;line-height: 8pt;text-align: left;">Sig_SWE</p>
</td>
<td style="width:31pt">
<p class="s8" style="padding-left: 1pt;text-indent: 0pt;line-height: 8pt;text-align: left;">Sil_P</p>
</td>
<td style="width:42pt">
<p class="s8" style="padding-left: 6pt;text-indent: 0pt;line-height: 8pt;text-align: left;">Sil_SWE</p>
</td>
</tr>
<tr style="height:10pt">
<td style="width:30pt">
<p class="s8" style="padding-left: 2pt;text-indent: 0pt;line-height: 8pt;text-align: left;">1986</p>
</td>
<td style="width:43pt">
<p class="s8" style="padding-left: 8pt;text-indent: 0pt;line-height: 8pt;text-align: left;">33.036</p>
</td>
<td style="width:29pt">
<p class="s8" style="padding-left: 1pt;text-indent: 0pt;line-height: 8pt;text-align: left;">9.3</p>
</td>
<td style="width:41pt">
<p class="s8" style="padding-left: 8pt;text-indent: 0pt;line-height: 8pt;text-align: left;">0</p>
</td>
<td style="width:33pt">
<p class="s8" style="padding-left: 3pt;padding-right: 9pt;text-indent: 0pt;line-height: 8pt;text-align: center;">13.8</p>
</td>
<td style="width:41pt">
<p class="s8" style="padding-left: 6pt;text-indent: 0pt;line-height: 8pt;text-align: left;">1</p>
</td>
<td style="width:31pt">
<p class="s8" style="padding-left: 1pt;text-indent: 0pt;line-height: 8pt;text-align: left;">16.6</p>
</td>
<td style="width:42pt">
<p class="s8" style="padding-left: 6pt;text-indent: 0pt;line-height: 8pt;text-align: left;">8.3</p>
</td>
</tr>
<tr style="height:10pt">
<td style="width:30pt">
<p class="s8" style="padding-left: 2pt;text-indent: 0pt;line-height: 8pt;text-align: left;">1987</p>
</td>
<td style="width:43pt">
<p class="s8" style="padding-left: 8pt;text-indent: 0pt;line-height: 8pt;text-align: left;">50.664</p>
</td>
<td style="width:29pt">
<p class="s8" style="padding-left: 1pt;text-indent: 0pt;line-height: 8pt;text-align: left;">11.2</p>
</td>
<td style="width:41pt">
<p class="s8" style="padding-left: 8pt;text-indent: 0pt;line-height: 8pt;text-align: left;">5.1</p>
</td>
<td style="width:33pt">
<p class="s8" style="padding-left: 3pt;padding-right: 9pt;text-indent: 0pt;line-height: 8pt;text-align: center;">15.8</p>
</td>
<td style="width:41pt">
<p class="s8" style="padding-left: 6pt;text-indent: 0pt;line-height: 8pt;text-align: left;">6.9</p>
</td>
<td style="width:31pt">
<p class="s8" style="padding-left: 1pt;text-indent: 0pt;line-height: 8pt;text-align: left;">19.8</p>
</td>
<td style="width:42pt">
<p class="s8" style="padding-left: 6pt;text-indent: 0pt;line-height: 8pt;text-align: left;">10.5</p>
</td>
</tr>
<tr style="height:10pt">
<td style="width:30pt">
<p class="s8" style="padding-left: 2pt;text-indent: 0pt;line-height: 8pt;text-align: left;">1988</p>
</td>
<td style="width:43pt">
<p class="s8" style="padding-left: 8pt;text-indent: 0pt;line-height: 8pt;text-align: left;">34.106</p>
</td>
<td style="width:29pt">