-
Notifications
You must be signed in to change notification settings - Fork 0
/
MoreStlc.v
2099 lines (1718 loc) · 67.1 KB
/
MoreStlc.v
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(** * MoreStlc: More on the Simply Typed Lambda-Calculus *)
Set Warnings "-notation-overridden,-parsing".
From PLF Require Import Maps.
From PLF Require Import Types.
From PLF Require Import Smallstep.
From PLF Require Import Stlc.
(* ################################################################# *)
(** * Simple Extensions to STLC *)
(** The simply typed lambda-calculus has enough structure to make its
theoretical properties interesting, but it is not much of a
programming language!
In this chapter, we begin to close the gap with real-world
languages by introducing a number of familiar features that have
straightforward treatments at the level of typing. *)
(* ================================================================= *)
(** ** Numbers *)
(** As we saw in exercise [stlc_arith] at the end of the [StlcProp]
chapter, adding types, constants, and primitive operations for
natural numbers is easy -- basically just a matter of combining
the [Types] and [Stlc] chapters. Adding more realistic
numeric types like machine integers and floats is also
straightforward, though of course the specifications of the
numeric primitives become more fiddly. *)
(* ================================================================= *)
(** ** Let Bindings *)
(** When writing a complex expression, it is useful to be able
to give names to some of its subexpressions to avoid repetition
and increase readability. Most languages provide one or more ways
of doing this. In OCaml (and Coq), for example, we can write [let
x=t1 in t2] to mean "reduce the expression [t1] to a value and
bind the name [x] to this value while reducing [t2]."
Our [let]-binder follows OCaml in choosing a standard
_call-by-value_ evaluation order, where the [let]-bound term must
be fully reduced before reduction of the [let]-body can begin.
The typing rule [T_Let] tells us that the type of a [let] can be
calculated by calculating the type of the [let]-bound term,
extending the context with a binding with this type, and in this
enriched context calculating the type of the body (which is then
the type of the whole [let] expression).
At this point in the book, it's probably easier simply to look at
the rules defining this new feature than to wade through a lot of
English text conveying the same information. Here they are: *)
(** Syntax:
t ::= Terms
| ... (other terms same as before)
| let x=t in t let-binding
*)
(**
Reduction:
t1 --> t1'
---------------------------------- (ST_Let1)
let x=t1 in t2 --> let x=t1' in t2
---------------------------- (ST_LetValue)
let x=v1 in t2 --> [x:=v1]t2
Typing:
Gamma |- t1 \in T1 x|->T1; Gamma |- t2 \in T2
-------------------------------------------------- (T_Let)
Gamma |- let x=t1 in t2 \in T2
*)
(* ================================================================= *)
(** ** Pairs *)
(** Our functional programming examples in Coq have made
frequent use of _pairs_ of values. The type of such a pair is
called a _product type_.
The formalization of pairs is almost too simple to be worth
discussing. However, let's look briefly at the various parts of
the definition to emphasize the common pattern. *)
(** In Coq, the primitive way of extracting the components of a pair
is _pattern matching_. An alternative is to take [fst] and
[snd] -- the first- and second-projection operators -- as
primitives. Just for fun, let's do our pairs this way. For
example, here's how we'd write a function that takes a pair of
numbers and returns the pair of their sum and difference:
\x : Nat*Nat,
let sum = x.fst + x.snd in
let diff = x.fst - x.snd in
(sum,diff)
*)
(** Adding pairs to the simply typed lambda-calculus, then, involves
adding two new forms of term -- pairing, written [(t1,t2)], and
projection, written [t.fst] for the first projection from [t] and
[t.snd] for the second projection -- plus one new type constructor,
[T1*T2], called the _product_ of [T1] and [T2]. *)
(** Syntax:
t ::= Terms
| ...
| (t,t) pair
| t.fst first projection
| t.snd second projection
v ::= Values
| ...
| (v,v) pair value
T ::= Types
| ...
| T * T product type
*)
(** For reduction, we need several new rules specifying how pairs and
projection behave.
t1 --> t1'
-------------------- (ST_Pair1)
(t1,t2) --> (t1',t2)
t2 --> t2'
-------------------- (ST_Pair2)
(v1,t2) --> (v1,t2')
t1 --> t1'
------------------ (ST_Fst1)
t1.fst --> t1'.fst
------------------ (ST_FstPair)
(v1,v2).fst --> v1
t1 --> t1'
------------------ (ST_Snd1)
t1.snd --> t1'.snd
------------------ (ST_SndPair)
(v1,v2).snd --> v2
*)
(** Rules [ST_FstPair] and [ST_SndPair] say that, when a fully
reduced pair meets a first or second projection, the result is
the appropriate component. The congruence rules [ST_Fst1] and
[ST_Snd1] allow reduction to proceed under projections, when the
term being projected from has not yet been fully reduced.
[ST_Pair1] and [ST_Pair2] reduce the parts of pairs: first the
left part, and then -- when a value appears on the left -- the right
part. The ordering arising from the use of the metavariables [v]
and [t] in these rules enforces a left-to-right evaluation
strategy for pairs. (Note the implicit convention that
metavariables like [v] and [v1] can only denote values.) We've
also added a clause to the definition of values, above, specifying
that [(v1,v2)] is a value. The fact that the components of a pair
value must themselves be values ensures that a pair passed as an
argument to a function will be fully reduced before the function
body starts executing. *)
(** The typing rules for pairs and projections are straightforward.
Gamma |- t1 \in T1 Gamma |- t2 \in T2
----------------------------------------- (T_Pair)
Gamma |- (t1,t2) \in T1*T2
Gamma |- t0 \in T1*T2
---------------------- (T_Fst)
Gamma |- t0.fst \in T1
Gamma |- t0 \in T1*T2
---------------------- (T_Snd)
Gamma |- t0.snd \in T2
*)
(** [T_Pair] says that [(t1,t2)] has type [T1*T2] if [t1] has
type [T1] and [t2] has type [T2]. Conversely, [T_Fst] and [T_Snd]
tell us that, if [t0] has a product type [T1*T2] (i.e., if it
will reduce to a pair), then the types of the projections from
this pair are [T1] and [T2]. *)
(* ================================================================= *)
(** ** Unit *)
(** Another handy base type, found especially in functional languages,
is the singleton type [Unit].
It has a single element -- the term constant [unit] (with a small
[u]) -- and a typing rule making [unit] an element of [Unit]. We
also add [unit] to the set of possible values -- indeed, [unit] is
the _only_ possible result of reducing an expression of type
[Unit]. *)
(** Syntax:
t ::= Terms
| ... (other terms same as before)
| unit unit
v ::= Values
| ...
| unit unit value
T ::= Types
| ...
| Unit unit type
Typing:
---------------------- (T_Unit)
Gamma |- unit \in Unit
*)
(** It may seem a little strange to bother defining a type that
has just one element -- after all, wouldn't every computation
living in such a type be trivial?
This is a fair question, and indeed in the STLC the [Unit] type is
not especially critical (though we'll see two uses for it below).
Where [Unit] really comes in handy is in richer languages with
_side effects_ -- e.g., assignment statements that mutate
variables or pointers, exceptions and other sorts of nonlocal
control structures, etc. In such languages, it is convenient to
have a type for the (trivial) result of an expression that is
evaluated only for its effect. *)
(* ================================================================= *)
(** ** Sums *)
(** Many programs need to deal with values that can take two distinct
forms. For example, we might identify students in a university
database using _either_ their name _or_ their id number. A search
function might return _either_ a matching value _or_ an error code.
These are specific examples of a binary _sum type_ (sometimes called
a _disjoint union_), which describes a set of values drawn from
one of two given types, e.g.:
Nat + Bool
*)
(** We create elements of these types by _tagging_ elements of
the component types. For example, if [n] is a [Nat] then [inl n]
is an element of [Nat+Bool]; similarly, if [b] is a [Bool] then
[inr b] is a [Nat+Bool]. The names of the tags [inl] and [inr]
arise from thinking of them as functions
inl \in Nat -> Nat + Bool
inr \in Bool -> Nat + Bool
that "inject" elements of [Nat] or [Bool] into the left and right
components of the sum type [Nat+Bool]. (But note that we don't
actually treat them as functions in the way we formalize them:
[inl] and [inr] are keywords, and [inl t] and [inr t] are primitive
syntactic forms, not function applications.) *)
(** In general, the elements of a type [T1 + T2] consist of the
elements of [T1] tagged with the token [inl], plus the elements of
[T2] tagged with [inr]. *)
(** As we've seen in Coq programming, one important use of sums is
signaling errors:
div \in Nat -> Nat -> (Nat + Unit)
div =
\x:Nat, \y:Nat,
test iszero y then
inr unit
else
inl ...
*)
(** The type [Nat + Unit] above is in fact isomorphic to [option
nat] in Coq -- i.e., it's easy to write functions that translate
back and forth. *)
(** To _use_ elements of sum types, we introduce a [case]
construct (a very simplified form of Coq's [match]) to destruct
them. For example, the following procedure converts a [Nat+Bool]
into a [Nat]:
getNat \in Nat+Bool -> Nat
getNat =
\x:Nat+Bool,
case x of
inl n => n
| inr b => test b then 1 else 0
*)
(** More formally... *)
(** Syntax:
t ::= Terms
| ... (other terms same as before)
| inl T t tagging (left)
| inr T t tagging (right)
| case t of case
inl x => t
| inr x => t
v ::= Values
| ...
| inl T v tagged value (left)
| inr T v tagged value (right)
T ::= Types
| ...
| T + T sum type
*)
(** Reduction:
t1 --> t1'
------------------------ (ST_Inl)
inl T2 t1 --> inl T2 t1'
t2 --> t2'
------------------------ (ST_Inr)
inr T1 t2 --> inr T1 t2'
t0 --> t0'
------------------------------------------- (ST_Case)
case t0 of inl x1 => t1 | inr x2 => t2 -->
case t0' of inl x1 => t1 | inr x2 => t2
----------------------------------------------- (ST_CaseInl)
case (inl T2 v1) of inl x1 => t1 | inr x2 => t2
--> [x1:=v1]t1
----------------------------------------------- (ST_CaseInr)
case (inr T1 v2) of inl x1 => t1 | inr x2 => t2
--> [x2:=v2]t2
*)
(** Typing:
Gamma |- t1 \in T1
------------------------------ (T_Inl)
Gamma |- inl T2 t1 \in T1 + T2
Gamma |- t2 \in T2
------------------------------- (T_Inr)
Gamma |- inr T1 t2 \in T1 + T2
Gamma |- t0 \in T1+T2
x1|->T1; Gamma |- t1 \in T3
x2|->T2; Gamma |- t2 \in T3
------------------------------------------------------ (T_Case)
Gamma |- case t0 of inl x1 => t1 | inr x2 => t2 \in T3
We use the type annotations on [inl] and [inr] to make the typing
relation deterministic (each term has at most one type), as we
did for functions. *)
(** Without this extra information, the typing rule [T_Inl], for
example, would have to say that, once we have shown that [t1] is
an element of type [T1], we can derive that [inl t1] is an element
of [T1 + T2] for _any_ type [T2]. For example, we could derive both
[inl 5 : Nat + Nat] and [inl 5 : Nat + Bool] (and infinitely many
other types). This peculiarity (technically, a failure of
uniqueness of types) would mean that we cannot build a
typechecking algorithm simply by "reading the rules from bottom to
top" as we could for all the other features seen so far.
There are various ways to deal with this difficulty. One simple
one -- which we've adopted here -- forces the programmer to
explicitly annotate the "other side" of a sum type when performing
an injection. This is a bit heavy for programmers (so real
languages adopt other solutions), but it is easy to understand and
formalize. *)
(* ================================================================= *)
(** ** Lists *)
(** The typing features we have seen can be classified into
_base types_ like [Bool], and _type constructors_ like [->] and
[*] that build new types from old ones. Another useful type
constructor is [List]. For every type [T], the type [List T]
describes finite-length lists whose elements are drawn from [T].
In principle, we could encode lists using pairs, sums and
_recursive_ types. But giving semantics to recursive types is
non-trivial. Instead, we'll just discuss the special case of lists
directly.
Below we give the syntax, semantics, and typing rules for lists.
Except for the fact that explicit type annotations are mandatory
on [nil] and cannot appear on [cons], these lists are essentially
identical to those we built in Coq. We use [case], rather than
[head] and [tail] operators, to destruct lists, to avoid dealing
with questions like "what is the [head] of the empty list?" *)
(** For example, here is a function that calculates the sum of
the first two elements of a list of numbers:
\x:List Nat,
case x of nil => 0
| a::x' => case x' of nil => a
| b::x'' => a+b
*)
(**
Syntax:
t ::= Terms
| ...
| nil T
| cons t t
| case t of nil => t
| x::x => t
v ::= Values
| ...
| nil T nil value
| cons v v cons value
T ::= Types
| ...
| List T list of Ts
*)
(** Reduction:
t1 --> t1'
-------------------------- (ST_Cons1)
cons t1 t2 --> cons t1' t2
t2 --> t2'
-------------------------- (ST_Cons2)
cons v1 t2 --> cons v1 t2'
t1 --> t1'
------------------------------------------- (ST_Lcase1)
(case t1 of nil => t2 | xh::xt => t3) -->
(case t1' of nil => t2 | xh::xt => t3)
------------------------------------------ (ST_LcaseNil)
(case nil T1 of nil => t2 | xh::xt => t3)
--> t2
------------------------------------------------ (ST_LcaseCons)
(case (cons vh vt) of nil => t2 | xh::xt => t3)
--> [xh:=vh,xt:=vt]t3
*)
(** Typing:
--------------------------- (T_Nil)
Gamma |- nil T1 \in List T1
Gamma |- t1 \in T1 Gamma |- t2 \in List T1
----------------------------------------------- (T_Cons)
Gamma |- cons t1 t2 \in List T1
Gamma |- t1 \in List T1
Gamma |- t2 \in T2
(h|->T1; t|->List T1; Gamma) |- t3 \in T2
--------------------------------------------------- (T_Lcase)
Gamma |- (case t1 of nil => t2 | h::t => t3) \in T2
*)
(* ================================================================= *)
(** ** General Recursion *)
(** Another facility found in most programming languages (including
Coq) is the ability to define recursive functions. For example,
we would like to be able to define the factorial function like
this:
fact = \x:Nat,
test x=0 then 1 else x * (fact (pred x)))
Note that the right-hand side of this binder mentions the variable
being bound -- something that is not allowed by our formalization of
[let] above.
Directly formalizing this "recursive definition" mechanism is possible,
but it requires some extra effort: in particular, we'd have to
pass around an "environment" of recursive function definitions in
the definition of the [step] relation. *)
(** Here is another way of presenting recursive functions that is
a bit more verbose but equally powerful and much more straightforward
to formalize: instead of writing recursive definitions, we will define
a _fixed-point operator_ called [fix] that performs the "unfolding"
of the recursive definition in the right-hand side as needed, during
reduction.
For example, instead of
fact = \x:Nat,
test x=0 then 1 else x * (fact (pred x)))
we will write:
fact =
fix
(\f:Nat->Nat,
\x:Nat,
test x=0 then 1 else x * (f (pred x)))
*)
(** We can derive the latter from the former as follows:
- In the right-hand side of the definition of [fact], replace
recursive references to [fact] by a fresh variable [f].
- Add an abstraction binding [f] at the front, with an
appropriate type annotation. (Since we are using [f] in place
of [fact], which had type [Nat->Nat], we should require [f]
to have the same type.) The new abstraction has type
[(Nat->Nat) -> (Nat->Nat)].
- Apply [fix] to this abstraction. This application has
type [Nat->Nat].
- Use all of this as the right-hand side of an ordinary
[let]-binding for [fact].
*)
(** The intuition is that the higher-order function [f] passed
to [fix] is a _generator_ for the [fact] function: if [f] is
applied to a function that "approximates" the desired behavior of
[fact] up to some number [n] (that is, a function that returns
correct results on inputs less than or equal to [n] but we don't
care what it does on inputs greater than [n]), then [f] returns a
slightly better approximation to [fact] -- a function that returns
correct results for inputs up to [n+1]. Applying [fix] to this
generator returns its _fixed point_, which is a function that
gives the desired behavior for all inputs [n].
(The term "fixed point" is used here in exactly the same sense as
in ordinary mathematics, where a fixed point of a function [f] is
an input [x] such that [f(x) = x]. Here, a fixed point of a
function [F] of type [(Nat->Nat)->(Nat->Nat)] is a function [f] of
type [Nat->Nat] such that [F f] behaves the same as [f].) *)
(** Syntax:
t ::= Terms
| ...
| fix t fixed-point operator
Reduction:
t1 --> t1'
------------------ (ST_Fix1)
fix t1 --> fix t1'
-------------------------------------------- (ST_FixAbs)
fix (\xf:T1.t1) --> [xf:=fix (\xf:T1.t1)] t1
Typing:
Gamma |- t1 \in T1->T1
---------------------- (T_Fix)
Gamma |- fix t1 \in T1
*)
(** Let's see how [ST_FixAbs] works by reducing [fact 3 = fix F 3],
where
F = (\f. \x. test x=0 then 1 else x * (f (pred x)))
(type annotations are omitted for brevity).
fix F 3
[-->] [ST_FixAbs] + [ST_App1]
(\x. test x=0 then 1 else x * (fix F (pred x))) 3
[-->] [ST_AppAbs]
test 3=0 then 1 else 3 * (fix F (pred 3))
[-->] [ST_Test0_Nonzero]
3 * (fix F (pred 3))
[-->] [ST_FixAbs + ST_Mult2]
3 * ((\x. test x=0 then 1 else x * (fix F (pred x))) (pred 3))
[-->] [ST_PredNat + ST_Mult2 + ST_App2]
3 * ((\x. test x=0 then 1 else x * (fix F (pred x))) 2)
[-->] [ST_AppAbs + ST_Mult2]
3 * (test 2=0 then 1 else 2 * (fix F (pred 2)))
[-->] [ST_Test0_Nonzero + ST_Mult2]
3 * (2 * (fix F (pred 2)))
[-->] [ST_FixAbs + 2 x ST_Mult2]
3 * (2 * ((\x. test x=0 then 1 else x * (fix F (pred x))) (pred 2)))
[-->] [ST_PredNat + 2 x ST_Mult2 + ST_App2]
3 * (2 * ((\x. test x=0 then 1 else x * (fix F (pred x))) 1))
[-->] [ST_AppAbs + 2 x ST_Mult2]
3 * (2 * (test 1=0 then 1 else 1 * (fix F (pred 1))))
[-->] [ST_Test0_Nonzero + 2 x ST_Mult2]
3 * (2 * (1 * (fix F (pred 1))))
[-->] [ST_FixAbs + 3 x ST_Mult2]
3 * (2 * (1 * ((\x. test x=0 then 1 else x * (fix F (pred x))) (pred 1))))
[-->] [ST_PredNat + 3 x ST_Mult2 + ST_App2]
3 * (2 * (1 * ((\x. test x=0 then 1 else x * (fix F (pred x))) 0)))
[-->] [ST_AppAbs + 3 x ST_Mult2]
3 * (2 * (1 * (test 0=0 then 1 else 0 * (fix F (pred 0)))))
[-->] [ST_Test0Zero + 3 x ST_Mult2]
3 * (2 * (1 * 1))
[-->] [ST_MultNats + 2 x ST_Mult2]
3 * (2 * 1)
[-->] [ST_MultNats + ST_Mult2]
3 * 2
[-->] [ST_MultNats]
6
*)
(** One important point to note is that, unlike [Fixpoint]
definitions in Coq, there is nothing to prevent functions defined
using [fix] from diverging. *)
(** **** Exercise: 1 star, standard, optional (halve_fix)
Translate this informal recursive definition into one using [fix]:
halve =
\x:Nat,
test x=0 then 0
else test (pred x)=0 then 0
else 1 + (halve (pred (pred x)))
(* FILL IN HERE *)
*)
(** [] *)
(** **** Exercise: 1 star, standard, optional (fact_steps)
Write down the sequence of steps that the term [fact 1] goes
through to reduce to a normal form (assuming the usual reduction
rules for arithmetic operations).
(* FILL IN HERE *)
*)
(** [] *)
(** The ability to form the fixed point of a function of type [T->T]
for any [T] has some surprising consequences. In particular, it
implies that _every_ type is inhabited by some term. To see this,
observe that, for every type [T], we can define the term
fix (\x:T,x)
By [T_Fix] and [T_Abs], this term has type [T]. By [ST_FixAbs]
it reduces to itself, over and over again. Thus it is a
_diverging element_ of [T].
More usefully, here's an example using [fix] to define a
two-argument recursive function:
equal =
fix
(\eq:Nat->Nat->Bool,
\m:Nat, \n:Nat,
test m=0 then iszero n
else test n=0 then fls
else eq (pred m) (pred n))
*)
(** And finally, here is an example where [fix] is used to define a
_pair_ of recursive functions (illustrating the fact that the type
[T1] in the rule [T_Fix] need not be a function type):
evenodd =
fix
(\eo: (Nat->Bool * Nat->Bool),
let e = \n:Nat, test n=0 then tru else eo,snd (pred n) in
let o = \n:Nat, test n=0 then fls else eo,fst (pred n) in
(e,o))
even = evenodd.fst
odd = evenodd.snd
*)
(* ================================================================= *)
(** ** Records *)
(** As a final example of a basic extension of the STLC, let's look
briefly at how to define _records_ and their types. Intuitively,
records can be obtained from pairs by two straightforward
generalizations: they are n-ary (rather than just binary) and
their fields are accessed by _label_ (rather than position). *)
(** Syntax:
t ::= Terms
| ...
| {i1=t1, ..., in=tn} record
| t.i projection
v ::= Values
| ...
| {i1=v1, ..., in=vn} record value
T ::= Types
| ...
| {i1:T1, ..., in:Tn} record type
*)
(** The generalization from products should be pretty obvious. But
it's worth noticing the ways in which what we've actually written is
even _more_ informal than the informal syntax we've used in previous
sections and chapters: we've used "[...]" in several places to mean "any
number of these," and we've omitted explicit mention of the usual
side condition that the labels of a record should not contain any
repetitions. *)
(**
Reduction:
ti --> ti'
------------------------------------ (ST_Rcd)
{i1=v1, ..., im=vm, in=ti , ...}
--> {i1=v1, ..., im=vm, in=ti', ...}
t0 --> t0'
-------------- (ST_Proj1)
t0.i --> t0'.i
------------------------- (ST_ProjRcd)
{..., i=vi, ...}.i --> vi
*)
(** Again, these rules are a bit informal. For example, the first rule
is intended to be read "if [ti] is the leftmost field that is not a
value and if [ti] steps to [ti'], then the whole record steps..."
In the last rule, the intention is that there should be only one
field called [i], and that all the other fields must contain values. *)
(**
The typing rules are also simple:
Gamma |- t1 \in T1 ... Gamma |- tn \in Tn
---------------------------------------------------- (T_Rcd)
Gamma |- {i1=t1, ..., in=tn} \in {i1:T1, ..., in:Tn}
Gamma |- t0 \in {..., i:Ti, ...}
-------------------------------- (T_Proj)
Gamma |- t0.i \in Ti
*)
(** There are several ways to approach formalizing the above
definitions.
- We can directly formalize the syntactic forms and inference
rules, staying as close as possible to the form we've given
them above. This is conceptually straightforward, and it's
probably what we'd want to do if we were building a real
compiler (in particular, it will allow us to print error
messages in the form that programmers will find easy to
understand). But the formal versions of the rules will not be
very pretty or easy to work with, because all the [...]s above
will have to be replaced with explicit quantifications or
comprehensions. For this reason, records are not included in
the extended exercise at the end of this chapter. (It is
still useful to discuss them informally here because they will
help motivate the addition of subtyping to the type system
when we get to the [Sub] chapter.)
- Alternatively, we could look for a smoother way of presenting
records -- for example, a binary presentation with one
constructor for the empty record and another constructor for
adding a single field to an existing record, instead of a
single monolithic constructor that builds a whole record at
once. This is the right way to go if we are primarily
interested in studying the metatheory of the calculi with
records, since it leads to clean and elegant definitions and
proofs. Chapter [Records] shows how this can be done.
- Finally, if we like, we can avoid formalizing records
altogether, by stipulating that record notations are just
informal shorthands for more complex expressions involving
pairs and product types. We sketch this approach in the next
section. *)
(* ----------------------------------------------------------------- *)
(** *** Encoding Records (Optional) *)
(** Let's see how records can be encoded using just pairs and
[unit]. (This clever encoding, as well as the observation that it
also extends to systems with subtyping, is due to Luca Cardelli.)
First, observe that we can encode arbitrary-size _tuples_ using
nested pairs and the [unit] value. To avoid overloading the pair
notation [(t1,t2)], we'll use curly braces without labels to write
down tuples, so [{}] is the empty tuple, [{5}] is a singleton
tuple, [{5,6}] is a 2-tuple (morally the same as a pair),
[{5,6,7}] is a triple, etc.
{} ----> unit {t1, t2, ..., tn} ----> (t1, trest) where {t2,
..., tn} ----> trest
Similarly, we can encode tuple types using nested product types:
{} ----> Unit {T1, T2, ..., Tn} ----> T1 * TRest where {T2, ...,
Tn} ----> TRest
The operation of projecting a field from a tuple can be encoded
using a sequence of second projections followed by a first
projection:
t.0 ----> t.fst t.(n+1) ----> (t.snd).n
Next, suppose that there is some total ordering on record labels,
so that we can associate each label with a unique natural number.
This number is called the _position_ of the label. For example,
we might assign positions like this:
LABEL POSITION a 0 b 1 c 2 ... ... bar 1395 ... ... foo 4460
... ...
We use these positions to encode record values as tuples (i.e., as
nested pairs) by sorting the fields according to their positions.
For example:
{a=5,b=6} ----> {5,6} {a=5,c=7} ----> {5,unit,7} {c=7,a=5} ---->
{5,unit,7} {c=5,b=3} ----> {unit,3,5} {f=8,c=5,a=7} ---->
{7,unit,5,unit,unit,8} {f=8,c=5} ----> {unit,unit,5,unit,unit,8}
Note that each field appears in the position associated with its
label, that the size of the tuple is determined by the label with
the highest position, and that we fill in unused positions with
[unit].
We do exactly the same thing with record types:
{a:Nat,b:Nat} ----> {Nat,Nat} {c:Nat,a:Nat} ----> {Nat,Unit,Nat}
{f:Nat,c:Nat} ----> {Unit,Unit,Nat,Unit,Unit,Nat}
Finally, record projection is encoded as a tuple projection from
the appropriate position:
t.l ----> t.(position of l)
It is not hard to check that all the typing rules for the original
"direct" presentation of records are validated by this
encoding. (The reduction rules are "almost validated" -- not
quite, because the encoding reorders fields.) *)
(** Of course, this encoding will not be very efficient if we
happen to use a record with label [foo]! But things are not
actually as bad as they might seem: for example, if we assume that
our compiler can see the whole program at the same time, we can
_choose_ the numbering of labels so that we assign small positions
to the most frequently used labels. Indeed, there are industrial
compilers that essentially do this! *)
(* ----------------------------------------------------------------- *)
(** *** Variants (Optional) *)
(** Just as products can be generalized to records, sums can be
generalized to n-ary labeled types called _variants_. Instead of
[T1+T2], we can write something like [<l1:T1,l2:T2,...ln:Tn>]
where [l1],[l2],... are field labels which are used both to build
instances and as case arm labels.
These n-ary variants give us almost enough mechanism to build
arbitrary inductive data types like lists and trees from
scratch -- the only thing missing is a way to allow _recursion_ in
type definitions. We won't cover this here, but detailed
treatments can be found in many textbooks -- e.g., Types and
Programming Languages [Pierce 2002] (in Bib.v). *)
(* ################################################################# *)
(** * Exercise: Formalizing the Extensions *)
Module STLCExtended.
(** **** Exercise: 3 stars, standard (STLCE_definitions)
In this series of exercises, you will formalize some of the
extensions described in this chapter. We've provided the
necessary additions to the syntax of terms and types, and we've
included a few examples that you can test your definitions with
to make sure they are working as expected. You'll fill in the
rest of the definitions and extend all the proofs accordingly.
To get you started, we've provided implementations for:
- numbers
- sums
- lists
- unit
You need to complete the implementations for:
- pairs
- let (which involves binding)
- [fix]
A good strategy is to work on the extensions one at a time, in two
passes, rather than trying to work through the file from start to
finish in a single pass. For each definition or proof, begin by
reading carefully through the parts that are provided for you,
referring to the text in the [Stlc] chapter for high-level
intuitions and the embedded comments for detailed mechanics. *)
(* ----------------------------------------------------------------- *)
(** *** Syntax *)
Inductive ty : Type :=
| Ty_Arrow : ty -> ty -> ty
| Ty_Nat : ty
| Ty_Sum : ty -> ty -> ty
| Ty_List : ty -> ty
| Ty_Unit : ty
| Ty_Prod : ty -> ty -> ty.
Inductive tm : Type :=
(* pure STLC *)
| tm_var : string -> tm
| tm_app : tm -> tm -> tm
| tm_abs : string -> ty -> tm -> tm
(* numbers *)
| tm_const: nat -> tm
| tm_succ : tm -> tm
| tm_pred : tm -> tm
| tm_mult : tm -> tm -> tm
| tm_if0 : tm -> tm -> tm -> tm
(* sums *)
| tm_inl : ty -> tm -> tm
| tm_inr : ty -> tm -> tm
| tm_case : tm -> string -> tm -> string -> tm -> tm
(* i.e., [case t0 of inl x1 => t1 | inr x2 => t2] *)
(* lists *)
| tm_nil : ty -> tm
| tm_cons : tm -> tm -> tm
| tm_lcase : tm -> tm -> string -> string -> tm -> tm
(* i.e., [case t1 of | nil => t2 | x::y => t3] *)
(* unit *)
| tm_unit : tm
(* You are going to be working on the following extensions: *)
(* pairs *)
| tm_pair : tm -> tm -> tm
| tm_fst : tm -> tm
| tm_snd : tm -> tm
(* let *)
| tm_let : string -> tm -> tm -> tm
(* i.e., [let x = t1 in t2] *)
(* fix *)
| tm_fix : tm -> tm.
(** Note that, for brevity, we've omitted booleans and instead
provided a single [if0] form combining a zero test and a
conditional. That is, instead of writing
if x = 0 then ... else ...
we'll write this:
if0 x then ... else ...
*)
Definition x : string := "x".
Definition y : string := "y".
Definition z : string := "z".
Hint Unfold x : core.
Hint Unfold y : core.
Hint Unfold z : core.
Declare Custom Entry stlc_ty.
Notation "<{ e }>" := e (e custom stlc at level 99).
Notation "<{{ e }}>" := e (e custom stlc_ty at level 99).