-
Notifications
You must be signed in to change notification settings - Fork 6
/
mmixal.w
3284 lines (2992 loc) · 117 KB
/
mmixal.w
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% This file is part of the MMIXware package (c) Donald E Knuth 1999
@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
\def\title{MMIXAL}
\def\MMIX{\.{MMIX}}
\def\MMIXAL{\.{MMIXAL}}
\def\Hex#1{\hbox{$^{\scriptscriptstyle\#}$\tt#1}} % experimental hex constant
\def\<#1>{\hbox{$\langle\,$#1$\,\rangle$}}\let\is=\longrightarrow
\def\bull{\smallbreak\textindent{$\bullet$}}
@s and normal @q unreserve a C++ keyword @>
@s or normal @q unreserve a C++ keyword @>
@s xor normal @q unreserve a C++ keyword @>
\ifx\exotic+
\font\heb=heb8 at 10pt
\font\rus=lhwnr8
\input unicode
\unicodeptsize=8pt
\fi
@* Definition of MMIXAL. This program takes input written in \MMIXAL,
the \MMIX\ assembly language, and translates it
@^assembly language@>
into binary files that can be loaded and executed
on \MMIX\ simulators. \MMIXAL\ is much simpler than the ``industrial
strength'' assembly languages that computer manufacturers usually provide,
because it is primarily intended for the simple demonstration programs
in {\sl The Art of Computer Programming}. Yet it tries to have enough
features to serve also as the back end of compilers for \CEE/ and other
high-level languages.
Instructions for using the program appear at the end of this document.
First we will discuss the input and output languages in detail; then we'll
consider the translation process, step by step; then we'll put everything
together.
@ A program in \MMIXAL\ consists of a series of {\it lines}, each of which
usually contains a single instruction. However, lines with no instructions are
possible, and so are lines with two or more instructions.
Each instruction has
three parts called its label field, opcode field, and operand field; these
fields are separated from each other by one or more spaces.
The label field, which is often empty, consists of all characters up to the
first blank space. The opcode field, which is never empty, runs from the first
nonblank after the label to the next blank space. The operand field, which
again might be empty, runs from the next nonblank character (if any) to the
first blank or semicolon that isn't part of a string or character constant.
If the operand field is followed by a semicolon, possibly with intervening
blanks, a new instruction begins immediately after the semicolon; otherwise
the rest of the line is ignored. The end of a line is treated as a blank space
for the purposes of these rules, with the additional proviso that
string or character constants are not allowed to extend from one line to
another.
The label field must begin with a letter or a digit; otherwise the entire
line is treated as a comment. Popular ways to introduce comments,
either at the beginning of a line or after the operand field, are to
precede them by the character \.\% as in \TeX, or by \.{//} as in \CPLUSPLUS/;
\MMIXAL\ is not very particular. However, Lisp-style comments introduced
by single semicolons will fail if they follow an instruction, because
they will be assumed to introduce another instruction.
@ \MMIXAL\ has no built-in macro capability, nor does it know how to
include header files and such things. But users can run their files
through a standard \CEE/ preprocessor to obtain \MMIXAL\ programs in which
macros and such things have been expanded. (Caution: The preprocessor also
removes \CEE/-style comments, unless it is told not to do so.)
Literate programming tools could also be used for preprocessing.
@^C preprocessor@>
@^literate programming@>
If a line begins with the special form `\.\# \<integer> \<string>',
this program interprets it as a {\it line directive\/} emitted by a
preprocessor. For example,
$$\leftline{\indent\.{\# 13 "foo.mms"}}$$
means that the following line was line 13 in the user's source file
\.{foo.mms}. Line directives allow us to correlate errors with the
user's original file; we also pass them to the output, for use by
simulators and debuggers.
@^line directives@>
@ \MMIXAL\ deals primarily with {\it symbols\/} and {\it constants}, which it
interprets and combines to form machine language instructions and data.
Constants are simplest, so we will discuss them first.
A {\it decimal constant\/} is a sequence of digits, representing a number in
radix~10. A~{\it hexadecimal constant\/} is a sequence of hexadecimal digits,
preceded by~\.\#, representing a number in radix~16:
$$\vbox{\halign{$#$\hfil\cr
\<digit>\is\.0\mid\.1\mid\.2\mid\.3\mid\.4\mid
\.5\mid\.6\mid\.7\mid\.8\mid\.9\cr
\<hex digit>\is\<digit>\mid\.A\mid\.B\mid\.C\mid\.D\mid\.E\mid\.F\mid
\.a\mid\.b\mid\.c\mid\.d\mid\.e\mid\.f\cr
\<decimal constant>\is\<digit>\mid\<decimal constant>\<digit>\cr
\<hex constant>\is\.\#\<hex digit>\mid\<hex constant>\<hex digit>\cr
}}$$
Constants whose value is $2^{64}$ or more are reduced modulo $2^{64}$.
@ A {\it character constant\/} is a single character enclosed in
single quote marks; it denotes the {\mc ASCII} or Unicode number
@^Unicode@>
corresponding to that character. For example, \.{'a'}
represents the constant \.{\#61}, also known as~\.{97}. The quoted character
can be
anything except the character that the \CEE/ library calls \.{\\n} or {\it
newline}; that character should be represented as \.{\#a}.
$$\vbox{\halign{$#$\hfil\cr
\<character constant>\is\.'\<single byte character except newline>\.'\cr
\<constant>\is\<decimal constant>\mid\<hex constant>\mid\<character constant>
\cr}}$$
Notice that \.{'''} represents a single quote, the code \.{\#27}; and
\.{'\\'} represents a backslash, the code \.{\#5c}. \MMIXAL~characters are
never ``quoted'' by backslashes as in the \CEE/~language.
In the present implementation
a character constant will always be at most 255, since wyde character
input is not supported.
\ifx\exotic+ But if the input were in Unicode one could write,
say, \.'{\heb\char"40}\.' or \.'{\rus ZH}\.' for \.{\#05d0} or
\.{\#0416}. \fi
The present program
does not support Unicode directly because basic software for inputting and
outputting 16-bit characters was still in a primitive state at the time of
writing. But the data structures below are designed so that a change to
Unicode will not be difficult when the time is ripe.
@ A {\it string constant\/} like \.{"Hello"} is an abbreviation for
a sequence of one or more character constants separated by commas:
\.{'H','e','l','l','o'}.
Any character except newline or the double quote mark~\."
can appear between the double quotes of a string constant.
\ifx\exotic+ Similarly,
\."\Uni1.08:24:24:-1:20% Unicode char "9ad8
<002000001800000806ffffff00000002004003ffe00300e00300c00300c003ffc0%
0300c02000043ffffe30000e31008c31ffcc3181cc31818c31818c31ff8c31818c3%
0007c300018>%
\thinspace\Uni1.08:24:24:-1:20% Unicode char "5fb7
<1c038018030018030631ffff30060067860446fffe86ccce0ccccc0ccccc18cccc%
18fffc38c00c38001878fffc58040098030818398618b18318b00b19b0081b300c1%
b3ffc181ff8>%
\thinspace\Uni1.08:24:24:-1:20% Unicode char "7eb3
<0601c00e01800c018018018018218231bfff61b187433186ff3186c631860c3186%
18334630332663b6367e341660380600300600300603b0061e3006f03006c030060%
0303e00300c>%
\kern.1em\." is an abbreviation for
\.'\Uni1.08:24:24:-1:20% Unicode char "9ad8
<002000001800000806ffffff00000002004003ffe00300e00300c00300c003ffc0%
0300c02000043ffffe30000e31008c31ffcc3181cc31818c31818c31ff8c31818c3%
0007c300018>%
\.{','}\Uni1.08:24:24:-1:20% Unicode char "5fb7
<1c038018030018030631ffff30060067860446fffe86ccce0ccccc0ccccc18cccc%
18fffc38c00c38001878fffc58040098030818398618b18318b00b19b0081b300c1%
b3ffc181ff8>%
\.{','}\Uni1.08:24:24:-1:20% Unicode char "7eb3
<0601c00e01800c018018018018218231bfff61b187433186ff3186c631860c3186%
18334630332663b6367e341660380600300600300603b0061e3006f03006c030060%
0303e00300c>%
\.' (namely \.{\#9ad8,\#5fb7,\#7eb3}) when Unicode is supported.
@^Unicode@>
\fi
@ A {\it symbol\/} in \MMIXAL\ is any sequence of letters and digits,
beginning with a letter. A~colon~`\.:' or underscore symbol `\.\_'
is regarded as a letter, for purposes of this definition.
All extended-ASCII characters like `{\tt \'e}',
whose 8-bit code exceeds 126, are also treated as letters.
$$\vbox{\halign{$#$\hfil\cr
\<letter>\is\.A\mid\.B\mid\cdots\mid\.Z\mid\.a\mid\.b\mid\cdots\mid\.z\mid
\.:\mid\.\_\mid\<{character with code value $>126$}>\cr
\<symbol>\is\<letter>\mid\<symbol>\<letter>\mid\<symbol>\<digit>\cr
}}$$
In future implementations, when \MMIXAL\ is used with Unicode,
@^Unicode@>
all wyde characters whose 16-bit code exceeds 126 will be regarded
as letters; thus \MMIXAL\ symbols will be able to involve Greek letters or
Chinese characters or thousands of other glyphs.
@ A symbol is said to
be {\it fully qualified\/} if it begins with a colon. Every symbol
that is not fully qualified is an abbreviation for the fully qualified
symbol obtained by placing the {\it current prefix\/} in front of it;
the current prefix is always fully qualified. At the beginning of an
\MMIXAL\ program the current prefix is simply the single character~`\.:',
but the user can change it with the \.{PREFIX} command. For example,
$$\vbox{\halign{&\quad\tt#\hfil\cr
ADD&x,y,z&\% means ADD :x,:y,:z\cr
PREFIX&Foo:&\% current prefix is :Foo:\cr
ADD&x,y,z&\% means ADD :Foo:x,:Foo:y,:Foo:z\cr
PREFIX&Bar:&\% current prefix is :Foo:Bar:\cr
ADD&:x,y,:z&\% means ADD :x,:Foo:Bar:y,:z\cr
PREFIX&:&\% current prefix reverts to :\cr
ADD&x,Foo:Bar:y,Foo:z&\% means ADD :x,:Foo:Bar:y,:Foo:z\cr
}}$$
This mechanism allows large programs to avoid conflicts between symbol names,
when parts of the program are independent and/or written by different users.
The current prefix conventionally ends with a colon, but this convention
need not be obeyed.
@ A {\it local symbol\/} is a decimal digit followed by one of the
letters \.B, \.F, or~\.H, meaning ``backward,'' ``forward,'' or ``here'':
$$\vbox{\halign{$#$\hfill\cr
\<local operand>\is\<digit>\,\.B\mid\<digit>\,\.F\cr
\<local label>\is\<digit>\,\.H\cr
}}$$
The \.B and \.F forms are permitted only in the operand field of \MMIXAL\
instructions; the \.H form is permitted only in the label field. A local
operand such as~\.{2B} stands for the last local label~\.{2H}
in instructions before the current one, or 0 if \.{2H} has not yet appeared
as a label. A~local operand such as~\.{2F} stands
for the first \.{2H} in instructions after the current one. Thus, in a
sequence such as
$$\vbox{\halign{\tt#\cr 2H JMP 2F\cr 2H JMP 2B\cr}}$$
the first instruction jumps to the second and the second jumps to the first.
Local symbols are useful for references to nearby points of a program, in
cases where no meaningful name is appropriate. They can also be useful
in special situations where a redefinable symbol is needed; for example,
an instruction like
$$\.{9H IS 9B+1}$$
will maintain a running counter.
@ Each symbol receives a value called its {\it equivalent\/} when it
appears in the label field of an instruction; it is said to be {\it defined\/}
after its equivalent has been established. A few symbols, like \.{rA}
and \.{ROUND\_OFF} and \.{Fopen},
are predefined because they refer to fixed constants
associated with the \MMIX\ hardware or its rudimentary operating system;
otherwise every symbol should be
defined exactly once. The two appearances of `\.{2H}' in the example
above do not violate this rule, because the second `\.{2H}' is not the
same symbol as the first.
A predefined symbol can be redefined (given a new equivalent). After it
has been redefined it acts like an ordinary symbol and cannot be
redefined again. A complete list of the predefined symbols appears
in the program listing below.
@^predefined symbols@>
Equivalents are either {\it pure\/} or {\it register numbers}. A pure
equivalent is an unsigned octabyte, but a register number
equivalent is a one-byte value, between 0 and~255.
A dollar sign is used to change a pure number into a register number;
for example, `\.{\$20}' means register number~20.
@ Constants and symbols are combined into {\it expressions\/} in a simple way:
$$\vbox{\halign{$#$\hfil\cr
\<primary expression>\is\<constant>\mid\<symbol>\mid\<local operand>\mid
\.{@@}\mid\cr
\hskip12pc\.(\<expression>\.)\mid\<unary operator>\<primary expression>\cr
\<term>\is\<primary expression>\mid
\<term>\<strong operator>\<primary expression>\cr
\<expression>\is\<term>\mid\<expression>\<weak operator>\<term>\cr
\<unary operator>\is\.+\mid\.-\mid\.\~\mid\.\$\mid\.\&\cr
\<strong operator>\is\.*\mid\./\mid\.{//}\mid\.\%\mid\.{<<}\mid\.{>>}
\mid\.\&\cr
\<weak operator>\is\.+\mid\.-\mid\.{\char'174}\mid\.\^\cr
}}$$
Each expression has a value that is either pure or a register number.
The character \.{@@} stands for the current location, which is always pure.
The unary operators
\.+, \.-, \.\~, \.\$, and \.\& mean, respectively, ``do nothing,''
``subtract from zero,'' ``complement the bits,'' ``change from pure value
to register number,'' and ``take the serial number.'' Only the first of these,
\.+, can be applied to a register number. The last unary operator, \.\&,
applies only to symbols, and it is of interest primarily to system programmers;
it converts a symbol to the unique positive integer that is used to identify
it in the binary file output by \MMIXAL.
@^serial number@>
Binary operators come in two flavors, strong and weak. The strong ones
are essentially concerned with multiplication or division: \.{x*y},
\.{x/y}, \.{x//y}, \.{x\%y}, \.{x<<y}, \.{x>>y}, and \.{x\&y}
stand respectively for
$(x\times y)\bmod2^{64}$ (multiplication), $\lfloor x/y\rfloor$ (division),
$\lfloor2^{64}x/y\rfloor$ (fractional division), $x\bmod y$ (remainder),
$(x\times2^y)\bmod2^{64}$ (left~shift), $\lfloor x/2^y\rfloor$
(right shift), and $x\mathbin{\char`\&}y$ (bitwise and) on unsigned octabytes.
Division is legal only if $y>0$; fractional division is
legal only if $x<y$. None of the strong binary operations can be
applied to register numbers.
The weak binary operations \.{x+y}, \.{x-y}, \.{x\char'174 y}, and
\.{x\^y} stand respectively for $(x+y)\bmod2^{64}$ (addition),
$(x-y)\bmod2^{64}$ (subtraction), $x\mathbin{\mkern1mu\vert\mkern1mu}y$
(bitwise or), and $x\oplus y$ (bitwise exclusive-or) on
unsigned octabytes. These operations can be applied to register
numbers only in four contexts: $\<register>+\<pure>$, $\<pure>+\<register>$,
$\<register>-\<pure>$
and $\<register>-\<register>$. For example, if \.{x} denotes \.{\$1} and
\.{y} denotes \.{\$10}, then \.{x+3} and \.{3+x} denote \.{\$4}, and
\.{y-x} denotes the pure value \.{9}.
Register numbers within expressions are allowed to be
arbitrary octabytes, but a register number assigned as the
equivalent of a symbol should not exceed 255.
(Incidentally, one might ask why the designer of \MMIXAL\ did not simply
adopt the existing rules of \CEE/ for expressions. The primary reason is that
the designers of \CEE/ chose to give \.{<<}, \.{>>}, and \.\& a lower
precedence than~\.+; but in \MMIXAL\ we want to be able to write things
like \.{o<<24+x<<16+y<<8+z} or \.{@@+yz<<2} or \.{@@+(\#100-@@)\&\#ff}.
Since the conventions of \CEE/ were inappropriate, it was better
to make a clean break, not pretending to have a close relationship
with that language. The new rules are quite easily memorized,
because \MMIXAL\ has just two levels of precedence, and the strong binary
operations are all essentially multiplicative by nature
while the weak binary operations are essentially additive.)
@ A symbol is called a {\it future reference\/} until it has been defined.
\MMIXAL\ restricts the use of future references, so that programs can
be assembled quickly in one pass over the input; therefore all
expressions can be evaluated when the \MMIXAL\ processor first sees them.
The restrictions are easily stated: Future references
cannot be used in expressions together with unary or binary operators (except
the unary~\.+, which does nothing); moreover, future references
can appear as operands only in instructions that have relative
addresses (namely branches, probable branches, \.{JMP}, \.{PUSHJ},
\.{GETA}) or in octabyte constants (the pseudo-operation \.{OCTA}).
Thus, for example, one can say \.{JMP}~\.{1F} or \.{JMP}~\.{1B-4}, but not
\.{JMP}~\.{1F-4}.
@ We noted earlier that each \MMIXAL\ instruction contains
a label field, an opcode field, and an operand field. The label field is
either empty or a symbol or local label; when it is nonempty, the
symbol or local label receives an equivalent. The operand field is
either empty or a sequence of expressions separated by commas; when
it is empty, it is equivalent to the simple operand field~`\.0'.
$$\vbox{\halign{$#$\hfil\cr
\<instruction>\is\<label>\<opcode>\<operand list>\cr
\<label>\is\<empty>\mid\<symbol>\mid\<local label>\cr
\<operand list>\is\<empty>\mid\<expression list>\cr
\<expression list>\is\<expression>\mid\<expression list>\.,\<expression>\cr
}}$$
The opcode field contains either a symbolic \MMIX\ operation name (like
\.{ADD}), or an {\it alias operation}, or a {\it pseudo-operation}.
Alias operations are alternate names for \MMIX\ operations whose standard
names are inappropriate in certain contexts.
Pseudo-operations do not correspond
directly to \MMIX\ commands, but they govern the assembly process in
important ways.
There are two alias operations:
\bull \.{SET} \.{\$X,\$Y} is equivalent to \.{OR} \.{\$X,\$Y,0}; it sets
register~X to register~Y. Similarly, \.{SET} \.{\$X,Y} (when \.Y is
not a register) is equivalent to \.{SETL} \.{\$X,Y}.
@.SET@>
\bull \.{LDA} \.{\$X,\$Y,\$Z} is equivalent to \.{ADDU} \.{\$X,\$Y,\$Z};
it loads the address of memory location $\rm \$Y+\$Z$ into register~X.
Similarly, \.{LDA} \.{\$X,\$Y,Z} is equivalent to \.{ADDU} \.{\$X,\$Y,Z}.
@.LDA@>
\smallskip
The symbolic operation names for genuine \MMIX\ operations
should not include the suffix~\.I for an immediate operation or the suffix~\.B
for a backward jump; \MMIXAL\ determines such things automatically.
Thus, one never writes \.{ADDI} or \.{JMPB} in the source input to
\MMIXAL, although such opcodes might appear when a simulator or
debugger or disassembler is presenting a numeric instruction in symbolic form.
$$\vbox{\halign{$#$\hfil\cr
\<opcode>\is\<symbolic \MMIX\ operation>\mid\<alias operation>\cr
\hskip12pc\mid\<pseudo-operation>\cr
\<symbolic \MMIX\ operation>\is\.{TRAP}\mid\.{FCMP}\mid\cdots\mid\.{TRIP}\cr
\<alias operation>\is\.{SET}\mid\.{LDA}\cr
\<pseudo-operation>\is\.{IS}\mid\.{LOC}\mid\.{PREFIX}\mid
\.{GREG}\mid\.{LOCAL}\mid\.{BSPEC}\mid\.{ESPEC}\cr
\hskip12pc\mid\.{BYTE}\mid\.{WYDE}\mid\.{TETRA}\mid\.{OCTA}\cr
}}$$
@ \MMIX\ operations like \.{ADD} require exactly three expressions as
operands. The first two must be register numbers. The third must be either a
register number or a pure number between 0 and~255; in the latter case,
\.{ADD} becomes \.{ADDI} in the assembled output. Thus, for example,
the command ``set register~1 to the sum of register~2 and register~3'' could be
expressed as
$$\.{ADD \$1,\$2,\$3}$$
or as, say,
$$\.{ADD x,y,y+1}$$
if the equivalent of \.x is \.{\$1} and the equivalent of \.y is \.{\$2}.
The command ``subtract 5 from register~1'' could be expressed as
$$\.{SUB \$1,\$1,5}$$
or as
$$\.{SUB x,x,5}$$
but not as `\.{SUBI} \.{\$1,\$1,5}' or `\.{SUBI} \.{x,x,5}'.
\MMIX\ operations like \.{FLOT} require either three operands
(register, pure, register/pure) or only two (register, register/pure).
In the first case the middle operand is the rounding mode, which is
best expressed in terms of the predefined symbolic values
\.{ROUND\_CURRENT}, \.{ROUND\_OFF}, \.{ROUND\_UP}, \.{ROUND\_DOWN},
\.{ROUND\_NEAR}, for $(0,1,2,3,4)$ respectively. In the second case
the middle operand is understood to be zero (namely,
\.{ROUND\_CURRENT}).
@:ROUND_OFF}\.{ROUND\_OFF@>
@:ROUND_UP}\.{ROUND\_UP@>
@:ROUND_DOWN}\.{ROUND\_DOWN@>
@:ROUND_NEAR}\.{ROUND\_NEAR@>
@:ROUND_CURRENT}\.{ROUND\_CURRENT@>
\MMIX\ operations like \.{SETL} or \.{INCH}, which involve a wyde
intermediate constant, require exactly two operands, (register, pure).
The value of the second operand should fit in two bytes.
\MMIX\ operations like \.{BNZ}, which mention a register and a
relative address, also require two operands. The first operand
should be a register number. The second operand should yield a result~$r$
in the range $-2^{16}\le r<2^{16}$ when the current location is subtracted
from it and the result is divided by~4. The second operand might also
be undefined; in that case, the eventual value must satisfy the
restriction stated for defined values. The opcodes \.{GETA} and
\.{PUSHJ} are similar, except that the first operand to \.{PUSHJ}
might also be pure (see below). The \.{JMP} operation is also
similar, but it has only one operand, and it allows the larger
address range $-2^{24}\le r<2^{24}$.
\MMIX\ operations that refer to memory, like \.{LDO} and \.{STHT} and \.{GO},
are treated like \.{ADD}
if they have three operands, except that the first operand should be
pure (not a register number) in the case of \.{PRELD}, \.{PREGO},
\.{PREST}, \.{STCO}, \.{SYNCD}, and \.{SYNCID}. These opcodes
also accept a special two-operand form, in which the second operand
stands for a {\it base address\/} and an immediate offset (see below).
The first operand of \.{PUSHJ} and \.{PUSHGO} can be either a pure
number or a register number. In the first case (`\.{PUSHJ}~\.{2,Sub}'
or `\.{PUSHGO}~\.{2,Sub}')
the programmer might be thinking ``let's push down two registers'';
in the second case (`\.{PUSHJ}~\.{\$2,Sub}' or `\.{PUSHGO}~\.{\$2,Sub}')
the programmer might be thinking ``let's make register~2 the hole
position for this subroutine call.'' Both cases result in the same
assembled output.
The remaining \MMIX\ opcodes are idiosyncratic:
$$\def\\{{\rm\quad or\quad}}
\vbox{\halign{\tt#\hfill\cr
NEG r,p,z;\cr
PUT s,z;\cr
GET r,s;\cr
POP p,yz;\cr
RESUME xyz;\cr
SAVE r,0;\cr
UNSAVE r;\cr
SYNC xyz;\cr
TRAP x,y,z\\TRAP x,yz\\TRAP xyz;\cr
}}$$
\.{SWYM} and \.{TRIP} are like \.{TRAP}. Here \.s is an integer
between 0 and~31, preferably given by one of the predefined
symbols \.{rA}, \.{rB}, \dots~for special register codes;
\.r is a register number; \.p is a pure byte; \.x, \.y, and \.z are
either register numbers or pure bytes; \.{yz} and \.{xyz} are pure
values that fit respectively in two and three bytes.
All of these rules can be summarized by saying that \MMIXAL\ treats each
\MMIX\ opcode in the most natural way. When there are three operands,
they affect fields X,~Y, and~Z of the assembled \MMIX\ instruction;
when there are two operands, they affect fields X and~YZ;
when there is just one operand, it affects field XYZ.
@ In all cases when the opcode corresponds to an \MMIX\ operation,
the \MMIXAL\ instruction tells the assembler to carry out four steps:
(1)~Align the current location
so that it is a multiple of~4, by adding 1, 2, or~3 if necessary;
(2)~Define the equivalent of the label field to be the
current location, if the label is nonempty;
(3)~Evaluate the operands and assemble the specified \MMIX\ instruction into
the current location;
(4)~Increase the current location by~4.
@ Now let's consider the pseudo-operations, starting with the simplest cases.
\bull\<label> \.{IS} \<expression>
defines the value of the label to be the value of the expression,
which must not be a future reference. The expression may be
either pure or a register number.
@.IS@>
\bull\<label> \.{LOC} \<expression>
first defines the label to be the value of the current location, if the label
is nonempty. Then the current location is changed to the value of the
expression, which must be pure.
@.LOC@>
\smallskip For example, `\.{LOC} \.{\#1000}' will start assembling subsequent
instructions or data in location whose hexa\-decimal value is \Hex{1000}.
`\.X~\.{LOC}~\.{@@+500}' defines \.X to be the address of the first
of 500 bytes in memory; assembly will continue at location $\.X+500$.
The operation of aligning the current location to a multiple of~256,
if it is not already aligned in that way, can be expressed as
`\.{LOC}~\.{@@+(256-@@)\&255}'.
A less trivial example arises if we want to emit instructions and data into
two separate areas of memory, but we want to intermix them in the
\MMIXAL\ source file. We could start by defining \.{8H} and \.{9H}
to be the starting addresses of the instruction and data segments,
respectively. Then, a sequence of instructions could be enclosed
in `\.{LOC}~\.{8B}; \dots; \.{8H}~\.{IS}~\.{@@}'; a sequence of
data could be enclosed in `\.{LOC}~\.{9B}; \dots; \.{9H}~\.{IS}~\.{@@}'.
Any number of such sequences could then be combined.
Instead of the two pseudo-instructions `\.{8H}~\.{IS}~\.{@@;} \.{LOC}~\.{9B}'
one could in fact write simply `\.{8H}~\.{LOC}~\.{9B}' when
switching from instructions to data.
\bull \.{PREFIX} \<symbol>
redefines the current prefix to be the given symbol (fully qualified).
The label field should be blank.
@.PREFIX@>
@ The next pseudo-operations assemble bytes, wydes, tetrabytes, or
octabytes of data.
\bull \<label> \.{BYTE} \<expression list>
defines the label to be the current location, if the label field is nonempty;
then it assembles one byte for each expression in the expression list, and
advances the current location by the number of bytes. The expressions
should all be pure numbers that fit in one byte.
String constants are often used in such expression lists.
For example, if the current location is \Hex{1000}, the instruction
\.{BYTE}~\.{"Hello",0} assembles six bytes containing the constants
\.{'H'}, \.{'e'}, \.{'l'}, \.{'l'}, \.{'o'}, and~\.0 into locations
\Hex{1000}, \dots,~\Hex{1005}, and advances the current location
to \Hex{1006}.
@.BYTE@>
\bull \<label> \.{WYDE} \<expression list>
is similar, but it first makes the current location even, by adding~1 to it
if necessary. Then it defines the label (if a nonempty label is present),
and assembles each expression as a two-byte value. The current location
is advanced by twice the number of expressions in the list. The
expressions should all be pure numbers that fit in two bytes.
@.WYDE@>
\bull \<label> \.{TETRA} \<expression list>
is similar, but it aligns the current location to a multiple of~4
before defining the label; then it
assembles each expression as a four-byte value. The current location
is advanced by $4n$ if there are $n$~expressions in the list. Each
expression should be a pure number that fits in four bytes.
@.TETRA@>
\bull \<label> \.{OCTA} \<expression list>
is similar, but it first aligns the current location to a multiple of~8;
it assembles each expression as an eight-byte value. The current location
is advanced by $8n$ if there are $n$~expressions in the list. Any or all
of the expressions may be future references, but they should all
be defined as pure numbers eventually.
@.OCTA@>
@ Global registers are important for accessing memory in \MMIX\ programs.
They could be allocated by hand, and defined with \.{IS} instructions,
but \MMIXAL\ provides a mechanism that is usually much more convenient:
\bull \<label> \.{GREG} \<expression>
allocates a new global register, and assigns its number as the
equivalent of the label.
At the beginning of assembly, the current global threshold~G is~\$255.
Each distinct \.{GREG} instruction decreases~G by~1; the final value of~G will
be the initial value of~rG when the assembled program is loaded.
@.GREG@>
The value of the expression will be loaded into the global register
at the beginning of the program. {\it If this value is nonzero, it
should remain constant throughout the program execution\/}; such
global registers are considered to be {\it base addresses}. Two or
more base addresses with the same constant value are assigned to the
same global register number.
Base addresses can simplify memory accesses in an important way.
Suppose, for example, five octabyte values appear in a data segment,
and their addresses are called \.{AA}, \.{BB}, \.{CC}, \.{DD}, and
\.{EE}:
$$\.{AA LOC @@+8;BB LOC @@+8;CC LOC @@+8;DD LOC @@+8;EE LOC @@+8}$$
Then if you say \.{Base GREG AA}, you will be able to write simply
`\.{LDO}~\.{\$1,AA}' to bring \.{AA} into register~\.{\$1}, and
`\.{LDO}~\.{\$2,CC}' to bring \.{CC} into register~\.{\$2}.
Here's how it works: Whenever a memory operation such as
\.{LDO} or \.{STB} or \.{GO} has only two operands, the second
operand should be a pure number whose value can be expressed
as $b+\delta$, where $0\le\delta<256$ and $b$ is the value of
a base address in one of the preceding \.{GREG} commands. The \MMIXAL\
processor will find the closest base address and manufacture an
appropriate command. For example, the instruction `\.{LDO}~\.{\$2,CC}' in the
example of the preceding paragraph would be converted automatically to
`\.{LDO}~\.{\$2,Base,16}'.
If no base address is close enough, an error message will be
generated, unless this program is run with the \.{-x} option
on the command line. The \.{-x} option inserts additional instructions
if necessary, using global register~255, so that any address is
accessible. For example,
if there is no base address that allows \.{LDO}~\.{\$2,FF} to be
implemented in a single instruction, but if \.{FF} equals \.{Base+1000},
then the \.{-x} option would assemble two instructions,
$$\.{SETL \$255,1000; LDO \$2,Base,\$255}$$
in place of \.{LDO}~\.{\$2,FF}. Caution:~The \.{-x} feature makes the
number of actual \MMIX\ instructions hard to predict, so extreme care must
be used if your style of coding includes relative branch instructions
in dangerous forms like `\.{BNZ}~\.{x,@@+8}'.
This base address convention can be used also with the alias
operation~\.{LDA}. For example, `\.{LDA}~\.{\$3,CC}' loads the
@.LDA@>
address of \.{CC} into register~3, by assembling the instruction
`\.{ADDU}~\.{\$3,Base,16}'.
\MMIXAL\ also allows a two-operand form for memory operations such as
$$\hbox{\.{LDO} \.{\$1,\$2}}$$
to be an abbreviation for `\.{LDO} \.{\$1,\$2,0}'.
When \MMIXAL\ programs use subroutines with a memory stack in addition
to the built-in register stack, they usually begin with the
instructions `\.{sp}~\.{GREG}~\.{0;fp}~\.{GREG}~\.0'; these instructions
allocate a {\it stack pointer\/} \.{sp=\$254} and a {\it frame pointer\/}
\.{fp=\$253}. However, subroutine libraries are free to implement any
conventions for global registers and stacks that they like.
@^stack pointer@>
@^frame pointer@>
@ Short programs rarely run out of global registers, but long programs
need a mechanism to check that \.{GREG} hasn't been used too often.
The following pseudo-instruction provides the necessary safety valve:
\bull \.{LOCAL} \<expression>
ensures that the expression will be a local register in the program
being assembled. The expression should be a register number, and
the label field should be blank. At the close of
assembly, \MMIXAL\ will report an error if the final value of~G does
not exceed all register numbers that are declared local in this way.
@.LOCAL@>
A \.{LOCAL} instruction need not be given unless the register number
is 32 or~more. (\MMIX\ always considers \.{\$0} through \.{\$31} to be
local, so \MMIXAL\ implicitly acts as if the
instruction `\.{LOCAL}~\.{\$31}' were present.)
@ Finally, there are two pseudo-instructions to pass information
and hints to the loading routine and/or to debuggers that will be
using the assembled program.
\bull \.{BSPEC} \<expression>
begins ``special mode''; the \<expression> should have a value that
fits in two bytes, and the label field should be blank.
@.BSPEC@>
\bull \.{ESPEC}
ends ``special mode''; the operand field is ignored, and the label
field should be blank.
@.ESPEC@>
\smallskip\noindent
All material assembled between \.{BSPEC} and \.{ESPEC} is passed
directly to the output, but not loaded as part of the assembled program.
Ordinary \MMIX\ instructions cannot appear in special mode; only the
pseudo-operations \.{IS}, \.{PREFIX}, \.{BYTE}, \.{WYDE}, \.{TETRA},
\.{OCTA}, \.{GREG}, and \.{LOCAL} are allowed. The operand of
\.{BSPEC} should have a value that fits in two bytes; this value
identifies the kind of data that follows. (For example, \.{BSPEC}~\.0
might introduce information about subroutine calling conventions at the
current location, and \.{BSPEC}~\.1 might introduce line numbers from
a high-level-language program that was compiled into the code at
the current place.
System routines often need to pass such information through an assembler
to the operating system, hence \MMIXAL\ provides a general-purpose conduit.)
@ A program should begin at the special symbolic location \.{Main}
@.Main@>
(more precisely, at the address corresponding to
the fully qualified symbol \.{:Main}).
This symbol always has serial number~1, and it must always be defined.
@^serial number@>
Locations should not receive assembled data more than once.
(More precisely, the loader will load the bitwise~xor of all the
data assembled for each byte position; but the general rule ``do not load
two things into the same byte'' is safest.)
All locations that do not receive assembled data are initially zero,
except that the loading routine will put register stack data into
segment~3, and the operating system may put command line data and
debugger data into segment~2.
(The rudimentary \MMIX\ operating system starts a program
with the number of command line arguments in~\$0, and a pointer to
the beginning of an array of argument pointers in~\$1.)
Segments 2 and 3 should not get assembled data, unless the
user is a true hacker who is willing to take the risk that such data
might crash the system.
@* Binary MMO output. When the \MMIXAL\ processor assembles a file
called \.{foo.mms}, it produces a binary output file called \.{foo.mmo}.
(The suffix \.{mms} stands for ``\MMIX\ symbolic,'' and \.{mmo} stands
for ``\MMIX\ object.'') Such \.{mmo} files have a simple structure
consisting of a sequence of tetrabytes. Some of the tetrabytes are
instructions to a loading routine; others are data to be loaded.
@^object files@>
Loader instructions are distinguished from tetrabytes of data by their
first (most significant) byte, which has the special escape-code value
\Hex{98}, called |mm| in the program below. This code value corresponds
to \MMIX's opcode \.{LDVTS}, which is unlikely to occur in tetras of
data. The second byte~X of a loader instruction is the loader opcode,
called the {\it lopcode}. The third and fourth bytes, Y~and~Z, are
operands. Sometimes they are combined into a single 16-bit operand called~YZ.
@^lopcodes@>
@d mm 0x98
@ A small, contrived example will help explain the basic ideas of \.{mmo}
format. Consider the following input file, called \.{test.mms}:
$$\obeyspaces\vbox{\halign{\tt#\hfil\cr
\% A peculiar example of MMIXAL\cr
\ LOC Data\_Segment \% location \#2000000000000000\cr
\ OCTA 1F \% a future reference\cr
a GREG @@ \% \$254 is base address for ABCD\cr
ABCD BYTE "ab" \% two bytes of data\cr
\ LOC \#123456789 \% switch to the instruction segment\cr
Main JMP 1F \% another future reference\cr
\ LOC @@+\#4000 \% skip past 16384 bytes\cr
2H LDB \$3,ABCD+1 \% use the base address\cr
\ BZ \$3,1F; TRAP \% and refer to the future again\cr
\# 3 "foo.mms" \% this comment is a line directive\cr
\ LOC 2B-4*10 \% move 10 tetras before previous location\cr
1H JMP 2B \% resolve previous references to 1F\cr
\ BSPEC 5 \% begin special data of type 5\cr
\ TETRA {\AM}a<<8 \% four bytes of special data\cr
\ WYDE a-\$0 \% two more bytes of special data\cr
\ ESPEC \% end a special data packet\cr
\ LOC ABCD+2 \% resume the data segment\cr
\ BYTE "cd",\#98 \% assemble three more bytes of data\cr
}}$$
It defines a silly program that essentially puts \.{'b'} into register~3;
the program halts when it gets to an all-zero \.{TRAP} instruction
following the~\.{BZ}. But the assembled output of this file illustrates most
of the features of \MMIX\ objects, and in fact \.{test.mms} was the
first test file tried by the author when the \MMIXAL\ processor was originally
written.
The binary output file \.{test.mmo} assembled from \.{test.mms} consists
of the following tetrabytes, shown in hexadecimal notation with brief
comments. Fuller explanations
appear with the descriptions of individual lopcodes below.
$$
\halign{\hskip.5in\tt#&\quad#\hfil\cr
98090101&|lop_pre| $1,1$ (preamble, version 1, 1 tetra)\cr
36f4a363&(the file creation time)\cr
% Sat Mar 20 23:44:35 1999
98012001&|lop_loc| $\Hex{20},1$ (data segment, 1 tetra)\cr
00000000&(low tetrabyte of address in data segment)\cr
00000000&(high tetrabyte of \.{OCTA} \.{1F})\cr
00000000&(low tetrabyte, will be fixed up later)\cr
61620000&(\.{"ab"}, padded with trailing zeros)\cr
\noalign{\penalty-200}
98010002&|lop_loc| $0,2$ (instruction segment, 2 tetras)\cr
00000001&(high tetrabyte of address in instruction segment)\cr
2345678c&(low tetrabyte of address, after alignment)\cr
98060002&|lop_file| $0,2$ (file name 0, 2 tetras)\cr
74657374&(\.{"test"})\cr
2e6d6d73&(\.{".mms"})\cr
98070007&|lop_line| 7 (line 7 of the current file)\cr
f0000000&(\.{JMP} \.{1F}, will be fixed up later)\cr
98024000&|lop_skip| \Hex{4000} (advance 16384 bytes)\cr
98070009&|lop_line| 9 (line 9 of the current file)\cr
8103fe01&(\.{LDB} \.{\$3,a,1}, uses base address \.a)\cr
42030000&(\.{BZ} \.{\$3,1F}, will be fixed later)\cr
9807000a&|lop_line| 10 (stay on line 10)\cr
00000000&(\.{TRAP})\cr
98010002&|lop_loc| $0,2$ (instruction segment, 2 tetras)\cr
00000001&(high tetrabyte of address in instruction segment)\cr
2345a768&(low tetrabyte of address \.{1H})\cr
98050010&|lop_fixrx| 16 (fix 16-bit relative address)\cr
0100fff5&(fixup for location \.{@@-4*-11})\cr
98040ff7&|lop_fixr| \Hex{ff7} (fix \.{@@-4*\#ff7})\cr
98032001&|lop_fixo| $\Hex{20},1$ (data segment, 1 tetra)\cr
00000000&(low tetrabyte of data segment address to fix)\cr
98060102&|lop_file| $1,2$ (file name 1, 2 tetras)\cr
666f6f2e&(\.{"foo."})\cr
6d6d7300&(\.{"mms",0})\cr
98070004&|lop_line| 4 (line 4 of the current file)\cr
f000000a&(\.{JMP} \.{2B})\cr
98080005&|lop_spec| 5 (begin special data of type 5)\cr
00000200&(\.{TETRA} \.{\&a<<8})\cr
00fe0000&(\.{WYDE} \.{a-\$0})\cr
98012001&|lop_loc| $\Hex{20},1$ (data segment, 1 tetra)\cr
0000000a&(low tetrabyte of address in data segment)\cr
00006364&(\.{"cd"} with leading zeros, because of alignment)\cr
98000001&|lop_quote| (don't treat next tetrabyte as a lopcode)\cr
98000000&(\.{BYTE} \.{\#98}, padded with trailing zeros)\cr
980a00fe&|lop_post| \$254 (begin postamble, G is 254)\cr
20000000&(high tetrabyte of the initial contents of \$254)\cr
00000008&(low tetrabyte of base address \$254)\cr
00000001&(high tetrabyte of the initial contents of \$255)\cr
2345678c&(low tetrabyte of \$255, is address of \.{Main})\cr
980b0000&|lop_stab| (begin symbol table)\cr
203a5040&(compressed form for symbol table as a ternary trie)\cr
50404020\cr
41204220\cr
43094408\cr
83404020&(\.{ABCD} = \Hex{2000000000000008}, serial 3)\cr
4d206120\cr
69056e01\cr
2345678c\cr
81400f61&(\.{Main} = \Hex{000000012345678c}, serial 1)\cr
fe820000&(\.{a} = \$254, serial 2)\cr
980c000a&|lop_end| (end symbol table, 10 tetras)\cr
}$$
@ When a tetrabyte of the \.{mmo} file does not begin with the escape code,
it is loaded into the current location~$\lambda$, and $\lambda$ is increased
to the next higher multiple of~4.
(If $\lambda$ is not a multiple of~4, the tetrabyte actually goes
into location $\lambda\land(-4)=4\lfloor\lambda/4\rfloor$, according
to \MMIX's usual conventions.) The current line number is also increased
by~1, if it is nonzero.
When a tetrabyte does begin with the escape code, its next byte
is the lopcode defining a loader instruction. There are thirteen lopcodes:
\bull |lop_quote|: $\rm X=\Hex{00}$, $\rm YZ=1$. Treat the next tetra as
an ordinary tetrabyte, even if it begins with the escape code.
\bull |lop_loc|: $\rm X=\Hex{01}$, $\rm Y=high$ byte, $\rm Z=tetra$ count
($\rm Z=1$~or~2). Set the current location to the 64-bit address defined
by the next Z tetras, plus $\rm 2^{56}Y$. Usually $\rm Y=0$ (for the
instruction segment) or $\rm Y=\Hex{20}$ (for the data segment).
If $\rm Z=2$, the high tetra appears first.
\bull |lop_skip|: $\rm X=\Hex{02}$, $\rm YZ=delta$. Increase the
current location by~YZ.
\bull |lop_fixo|: $\rm X=\Hex{03}$, $\rm Y=high$ byte, $\rm Z=tetra$ count
($\rm Z=1$~or~2). Load the value of the current location~$\lambda$ into
octabyte~P, where P~is the 64-bit address defined by the next Z tetras
plus $\rm2^{56}Y$ as in |lop_loc|. (The octabyte at~P was previously assembled
as zero because of a future reference.)
\bull |lop_fixr|: $\rm X=\Hex{04}$, $\rm YZ=delta$. Load YZ into the YZ~field
of the tetrabyte in location~P, where P~is
$\rm\lambda-4YZ$, namely the address that precedes the current location
by YZ~tetrabytes. (This tetrabyte was previously loaded with an \MMIX\
instruction that takes a relative address: a branch, probable branch,
\.{JMP}, \.{PUSHJ}, or~\.{GETA}. Its YZ~field was previously
assembled as zero because of a future reference.)
\bull |lop_fixrx|: $\rm X=\Hex{05}$, $\rm Y=0$, $\rm Z=16$ or 24.
Proceed as in |lop_fixr|,
but load $\delta$ into tetrabyte $\rm P=\lambda-4\delta$ instead of loading
YZ into $\rm P=\lambda-4YZ$. Here $\delta$ is the value of the tetrabyte
following the |lop_fixrx| instruction; its leading byte will be either
0 or~1. If the leading byte is~1, $\delta$ should be treated as the
{\it negative\/} number $(\delta\land\Hex{ffffff})-2^{\rm Z}$ when
calculating the address~P. (The latter case arises only rarely,
but it is needed when fixing up a relative ``future'' reference that
ultimately leads to a ``backward'' instruction. The value of~$\delta$ that
is xored into location~P in such cases will change \.{BZ} to \.{BZB},
or \.{JMP} to \.{JMPB}, etc.; we have $\rm Z=24$ when fixing a~\.{JMP},
$\rm Z=16$ otherwise.)
\bull |lop_file|: $\rm X=\Hex{06}$, $\rm Y=file$ number, $\rm Z=tetra$ count.
Set the current file number to~Y and the current line number to~zero. If this
file number has occurred previously, Z~should be zero; otherwise Z~should be
positive, and the next Z tetrabytes are the characters of the file name in
big-endian order.
Trailing zeros follow the file name if its length is not a multiple of~4.
\bull |lop_line|: $\rm X=\Hex{07}$, $\rm YZ=line$ number. Set the current line
number to~YZ\null. If the line number is nonzero, the current file and current
line should correspond to the source location that generated the next data to
be loaded, for use in diagnostic messages. (The \MMIXAL\ processor gives
precise line numbers to the sources of tetrabytes in segment~0, which tend to
be instructions, but not to the sources of tetrabytes assembled in other
segments.)
\bull |lop_spec|: $\rm X=\Hex{08}$, $\rm YZ=type$. Begin special data of
type~YZ\null. The subsequent tetrabytes, continuing until the next loader
operation other than |lop_quote|, comprise the special data. A |lop_quote|
instruction allows tetrabytes of special data to begin with the escape code.
\bull |lop_pre|: $\rm X=\Hex{09}$, $\rm Y=1$, $\rm Z=tetra$ count. A~|lop_pre|
instruction, which defines the ``preamble,'' must be the first tetrabyte of
every \.{mmo} file. The Y~field specifies the version number of \.{mmo}
format, currently~1; other version numbers may be defined later, but
version~1 should always be supported as described in the present document.
The Z~tetrabytes following a |lop_pre| command provide additional information
that might be of interest to system routines. If $\rm Z>0$, the first tetra
of additional information records the time that this \.{mmo} file was
created, measured in seconds since 00:00:00 Greenwich Mean Time on
1~Jan~1970.
\bull |lop_post|: $\rm X=\Hex{0a}$, $\rm Y=0$, $\rm Z=G$ (must be 32~or~more).
This instruction begins the {\it postamble}, which follows all instructions
and data to be loaded. It causes the loaded program to begin with rG equal to
the stated value of~G, and with \$G, $\rm G+1$, \dots,~\$255 initially set to
the values of the next $\rm(256-G)*2$ tetrabytes. These tetrabytes specify
$\rm 256-G$ octabytes in big-endian fashion (high half first).
\bull |lop_stab|: $\rm X=\Hex{0b}$, $\rm YZ=0$. This instruction must appear
immediately after the $\rm(256-G)*2$ tetrabytes following~|lop_post|. It is
followed by the symbol table, which lists the equivalents of all user-defined
symbols in a compact form that will be described later.
\bull |lop_end|: $\rm X=\Hex{0c}$, $\rm YZ=tetra$ count. This instruction
must be the very last tetrabyte of each \.{mmo} file. Furthermore,
exactly YZ tetrabytes must appear between it and the |lop_stab| command.
(Therefore a program can easily find the symbol table without reading
forward through the entire \.{mmo} file.)
\smallskip
A separate routine called \.{MMOtype} is available to translate
binary \.{mmo} files into human-readable form.
@d lop_quote 0x0 /* the quotation lopcode */
@d lop_loc 0x1 /* the location lopcode */
@d lop_skip 0x2 /* the skip lopcode */
@d lop_fixo 0x3 /* the octabyte-fix lopcode */
@d lop_fixr 0x4 /* the relative-fix lopcode */
@d lop_fixrx 0x5 /* extended relative-fix lopcode */
@d lop_file 0x6 /* the file name lopcode */
@d lop_line 0x7 /* the file position lopcode */
@d lop_spec 0x8 /* the special hook lopcode */
@d lop_pre 0x9 /* the preamble lopcode */
@d lop_post 0xa /* the postamble lopcode */
@d lop_stab 0xb /* the symbol table lopcode */
@d lop_end 0xc /* the end-it-all lopcode */
@ Many readers will have noticed that \MMIXAL\ has no facilities for
relocatable output, nor does \.{mmo} format support such features. The
author's first drafts of \MMIXAL\ and \.{mmo} did allow relocatable objects,
with external linkages, but the rules were substantially more complicated and
therefore inconsistent with the goals of {\sl The Art of Computer Programming}.
The present design might actually prove to be superior to the current
practice, now that computer memory is significantly cheaper than it
used to be, because one-pass assembly and loading are extremely fast when
relocatability and external linkages are disallowed. Different program modules
can be assembled together about as fast as they could be linked together under
a relocatable scheme, and they can communicate with each other in much more
flexible ways. Debugging tools are enhanced when open-source libraries are
combined with user programs, and such libraries will certainly improve in
quality when their source form is accessible to a larger community of users.
@* Basic data types.
This program for the 64-bit \MMIX\ architecture is based on 32-bit integer
arithmetic, because nearly every computer available to the author at the time
of writing was limited in that way.
Details of the basic arithmetic appear in a separate program module
called {\mc MMIX-ARITH}, because the same routines are needed also
for the simulators. The definition of type \&{tetra} should be changed, if
necessary, to conform with the definitions found in {\mc MMIX-ARITH}.
@^system dependencies@>
@<Type...@>=
typedef unsigned int tetra;
/* assumes that an int is exactly 32 bits wide */
typedef struct { tetra h,l;} octa; /* two tetrabytes make one octabyte */
typedef enum {@!false,@!true}@+@!bool;
@ @<Glob...@>=
extern octa zero_octa; /* |zero_octa.h=zero_octa.l=0| */
extern octa neg_one; /* |neg_one.h=neg_one.l=-1| */
extern octa aux; /* auxiliary output of a subroutine */
extern bool overflow; /* set by certain subroutines for signed arithmetic */
@ Most of the subroutines in {\mc MMIX-ARITH} return an octabyte as
a function of two octabytes; for example, |oplus(y,z)| returns the
sum of octabytes |y| and~|z|. Division inputs the high
half of a dividend in the global variable~|aux| and returns
the remainder in~|aux|.
@<Sub...@>=
extern octa oplus @,@,@[ARGS((octa y,octa z))@];
/* unsigned $y+z$ */
extern octa ominus @,@,@[ARGS((octa y,octa z))@];
/* unsigned $y-z$ */
extern octa incr @,@,@[ARGS((octa y,int delta))@];
/* unsigned $y+\delta$ ($\delta$ is signed) */
extern octa oand @,@,@[ARGS((octa y,octa z))@];
/* $y\land z$ */
extern octa shift_left @,@,@[ARGS((octa y,int s))@];
/* $y\LL s$, $0\le s\le64$ */
extern octa shift_right @,@,@[ARGS((octa y,int s,int u))@];
/* $y\GG s$, signed if |!u| */
extern octa omult @,@,@[ARGS((octa y,octa z))@];
/* unsigned $(|aux|,x)=y\times z$ */
extern octa odiv @,@,@[ARGS((octa x,octa y,octa z))@];
/* unsigned $(x,y)/z$; $|aux|=(x,y)\bmod z$ */
@ Here's a rudimentary check to see if arithmetic is in trouble.
@<Init...@>=
acc=shift_left(neg_one,1);
if (acc.h!=0xffffffff) panic("Type tetra is not implemented correctly");
@.Type tetra...@>
@ Future versions of this program will work with symbols formed from Unicode
characters, but the present code limits itself to an 8-bit subset.
@^Unicode@>
The type \&{Char} is defined here in order to ease the later transition: