From f9fc51bb573fb731a09f3948644a4f05d0a878bd Mon Sep 17 00:00:00 2001 From: Jeff Fifield Date: Fri, 19 Apr 2024 08:32:42 -0600 Subject: [PATCH] update programming examples again (#1317) Co-authored-by: singagan <53442471+singagan@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- aie_kernels/aie2/conv2dk1.cc | 413 +++++ aie_kernels/aie2/conv2dk1.h | 25 + aie_kernels/aie2/conv2dk1_i8.cc | 224 +++ aie_kernels/aie2/conv2dk1_i8.h | 22 + aie_kernels/aie2/conv2dk1_skip.cc | 766 +++++++++ aie_kernels/aie2/conv2dk1_skip.h | 31 + aie_kernels/aie2/conv2dk1_skip_init.cc | 934 +++++++++++ aie_kernels/aie2/conv2dk1_skip_init.h | 33 + aie_kernels/aie2/conv2dk3.cc | 1434 +++++++++++++++++ aie_kernels/aie2/conv2dk3.h | 33 + .../ml/bottleneck/CMakeLists.txt | 89 + programming_examples/ml/bottleneck/Makefile | 40 + programming_examples/ml/bottleneck/README.md | 125 ++ programming_examples/ml/bottleneck/aie2.py | 639 ++++++++ .../ml/bottleneck/bottleneck_block.png | Bin 0 -> 227382 bytes .../ml/bottleneck/requirements.txt | 1 + programming_examples/ml/bottleneck/run.lit | 12 + programming_examples/ml/bottleneck/test.py | 190 +++ programming_examples/ml/conv2d/CMakeLists.txt | 89 + programming_examples/ml/conv2d/Makefile | 35 + programming_examples/ml/conv2d/README.md | 67 + programming_examples/ml/conv2d/aie2.py | 263 +++ .../ml/conv2d/requirements.txt | 1 + programming_examples/ml/conv2d/run.lit | 10 + programming_examples/ml/conv2d/test.py | 149 ++ .../ml/conv2d_fused_relu/CMakeLists.txt | 89 + .../ml/conv2d_fused_relu/Makefile | 35 + .../ml/conv2d_fused_relu/README.md | 99 ++ .../ml/conv2d_fused_relu/aie2.py | 263 +++ .../ml/conv2d_fused_relu/requirements.txt | 1 + .../ml/conv2d_fused_relu/run.lit | 10 + .../ml/conv2d_fused_relu/test.py | 151 ++ programming_examples/ml/resnet/README.md | 121 ++ .../ml/resnet/layers_conv2_x/CMakeLists.txt | 89 + .../ml/resnet/layers_conv2_x/Makefile | 50 + .../ml/resnet/layers_conv2_x/aie.mlir | 1014 ++++++++++++ .../ml/resnet/layers_conv2_x/aie2.py | 639 ++++++++ .../ml/resnet/layers_conv2_x/requirements.txt | 1 + .../ml/resnet/layers_conv2_x/run.lit | 14 + .../ml/resnet/layers_conv2_x/test.py | 436 +++++ 40 files changed, 8637 insertions(+) create mode 100755 aie_kernels/aie2/conv2dk1.cc create mode 100755 aie_kernels/aie2/conv2dk1.h create mode 100644 aie_kernels/aie2/conv2dk1_i8.cc create mode 100644 aie_kernels/aie2/conv2dk1_i8.h create mode 100755 aie_kernels/aie2/conv2dk1_skip.cc create mode 100755 aie_kernels/aie2/conv2dk1_skip.h create mode 100755 aie_kernels/aie2/conv2dk1_skip_init.cc create mode 100755 aie_kernels/aie2/conv2dk1_skip_init.h create mode 100755 aie_kernels/aie2/conv2dk3.cc create mode 100755 aie_kernels/aie2/conv2dk3.h create mode 100644 programming_examples/ml/bottleneck/CMakeLists.txt create mode 100755 programming_examples/ml/bottleneck/Makefile create mode 100644 programming_examples/ml/bottleneck/README.md create mode 100644 programming_examples/ml/bottleneck/aie2.py create mode 100644 programming_examples/ml/bottleneck/bottleneck_block.png create mode 100644 programming_examples/ml/bottleneck/requirements.txt create mode 100644 programming_examples/ml/bottleneck/run.lit create mode 100644 programming_examples/ml/bottleneck/test.py create mode 100644 programming_examples/ml/conv2d/CMakeLists.txt create mode 100755 programming_examples/ml/conv2d/Makefile create mode 100644 programming_examples/ml/conv2d/README.md create mode 100644 programming_examples/ml/conv2d/aie2.py create mode 100644 programming_examples/ml/conv2d/requirements.txt create mode 100644 programming_examples/ml/conv2d/run.lit create mode 100644 programming_examples/ml/conv2d/test.py create mode 100644 programming_examples/ml/conv2d_fused_relu/CMakeLists.txt create mode 100755 programming_examples/ml/conv2d_fused_relu/Makefile create mode 100644 programming_examples/ml/conv2d_fused_relu/README.md create mode 100644 programming_examples/ml/conv2d_fused_relu/aie2.py create mode 100644 programming_examples/ml/conv2d_fused_relu/requirements.txt create mode 100644 programming_examples/ml/conv2d_fused_relu/run.lit create mode 100644 programming_examples/ml/conv2d_fused_relu/test.py create mode 100755 programming_examples/ml/resnet/README.md create mode 100755 programming_examples/ml/resnet/layers_conv2_x/CMakeLists.txt create mode 100755 programming_examples/ml/resnet/layers_conv2_x/Makefile create mode 100755 programming_examples/ml/resnet/layers_conv2_x/aie.mlir create mode 100755 programming_examples/ml/resnet/layers_conv2_x/aie2.py create mode 100755 programming_examples/ml/resnet/layers_conv2_x/requirements.txt create mode 100755 programming_examples/ml/resnet/layers_conv2_x/run.lit create mode 100755 programming_examples/ml/resnet/layers_conv2_x/test.py diff --git a/aie_kernels/aie2/conv2dk1.cc b/aie_kernels/aie2/conv2dk1.cc new file mode 100755 index 0000000000..08eb7312e9 --- /dev/null +++ b/aie_kernels/aie2/conv2dk1.cc @@ -0,0 +1,413 @@ +//===- conv2dk1.cc -------------------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// #define __AIENGINE__ 1 +#define __AIENGINE__ 2 +#define NOCPP +#define __AIEARCH__ 20 + +#include +#include +#include + +#include + +#define REL_WRITE 0 +#define REL_READ 1 + +#ifdef SCALAR + +const int32_t UMAX = 255; + +#ifdef INT8_ACT + +//***************************************************************************** +// conv2d 1x1 - scalar +// act: int8, wts: int8, out: uint8 +//***************************************************************************** +void conv2dk1_i8_scalar(int8_t *input, int8_t *kernels, uint8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale) { + event0(); + + int x, ic, oc, ic8, oc8; + // scale=-17; + for (oc = 0; oc < output_channels / 8; oc++) { + for (x = 0; x < input_width; x++) { // col of output image + for (oc8 = 0; oc8 < 8; oc8++) { + int sum = 0; + int sum_srs = 0; + + for (ic = 0; ic < input_channels / 8; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + int val = input[(ic * input_width * 8) + (x * 8) + ic8]; + int k = kernels[(oc * (input_channels / 8) * 64) + (ic * 64) + + (ic8 * 8) + oc8]; + sum += val * k; + } + } + + // sum_srs=sum>>scale; + sum_srs = (sum + (1 << (scale - 1))) >> scale; + sum_srs = (sum_srs > UMAX) ? UMAX : (sum_srs < 0) ? 0 : sum_srs; + // sum_srs = input[(oc*input_width*8) + (x*8) + oc8]; + output[(oc * input_width * 8) + (x * 8) + oc8] = sum_srs; + } + } + } + + event1(); +} + +#else // UINT8_ACT + +//***************************************************************************** +// conv2d 1x1 - scalar +// act: uint8, wts: int8, out: uint8 +//***************************************************************************** +void conv2dk1_ui8_scalar(uint8_t *input, int8_t *kernels, uint8_t *output, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, const int scale) { + event0(); + + int x, ic, oc, ic8, oc8; + // scale=-17; + for (oc = 0; oc < output_channels / 8; oc++) { + for (x = 0; x < input_width; x++) { // col of output image + for (oc8 = 0; oc8 < 8; oc8++) { + int sum = 0; + int sum_srs = 0; + + for (ic = 0; ic < input_channels / 8; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + uint8_t val = input[(ic * input_width * 8) + (x * 8) + ic8]; + int8_t k = kernels[(oc * (input_channels / 8) * 64) + (ic * 64) + + (ic8 * 8) + oc8]; + sum += val * k; + } + } + + // sum_srs=sum>>scale; + sum_srs = (sum + (1 << (scale - 1))) >> scale; + sum_srs = (sum_srs > UMAX) ? UMAX : (sum_srs < 0) ? 0 : sum_srs; + // sum_srs = input[(oc*input_width*8) + (x*8) + oc8]; + output[(oc * input_width * 8) + (x * 8) + oc8] = sum_srs; + } + } + } + + event1(); +} + +#endif // UINT8_ACT + +#else // Vector + +#ifdef INT8_ACT + +//***************************************************************************** +// conv2d 1x1 - vector +// act: int8, wts: int8, out: uint8 +// +// Assume IC >= 16 as that gives ideal inner loop schedule +// +// TODO - Restricting input_width is mutiple of 32 +// Because each VMAC works on 4 inputs at a time and we store intermediate +// results in 8 accumulators, having input_width be a multiple of 4*8=32 is +// ideal. However, we should be able to support input_width that is only a +// multiple of 4 but there is some strange scheduling happening now so for +// now, we do not. +//***************************************************************************** +void conv2dk1_i8_vector(int8_t *input, int8_t *kernels, uint8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale) { + event0(); + + using MMUL4x8x8 = aie::mmul<4, 8, 8, int8, int8>; + ::aie::set_saturation( + aie::saturation_mode::saturate); // Needed to saturate properly to uint8 + ::aie::set_rounding( + aie::rounding_mode::positive_inf); // Needed to saturate properly to uint8 + + uint8_t *restrict out_ptr = output; + + const int scaleT = scale; + + MMUL4x8x8 acc_tmp[8]; + for (int x = 0; x < 8; x++) { + acc_tmp[x] = aie::zeros(); + } + + // TODO Keeping this variable gives a wrong behavior and bad schedule! + const int iw = input_width; + const int iw_32 = (input_width / 4) / 8; + + // const int iw_32_rem = (input_width / 4) % 8; + // const int iw_32_rem = (32 / 4) % 8; + assert((input_width / 4) % 8 == 0); + const int iw_32_rem = 0; // TODO - See restriction + + assert((input_channels / 8) > 2); // Assume IC >= 16 + + if (iw_32 > 0) { + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int iw_32c = 0; iw_32c < iw_32; iw_32c++) { + for (int ic = 0; ic < (input_channels / 8); ic++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x = 0; x < 8; x++) { + aie::vector in_a = aie::load_v<32>(input); + input += 32; // act oc0..3(ic0..7) + acc_tmp[x].mac(in_a, in_b); + } + input += (iw * 8) - 256; // Move to next ic/8 position + } + // input ptr just moves to next section + for (int xx = 0; xx < 8; xx++) { + aie::vector o1 = acc_tmp[xx].to_vector(scaleT); + aie::store_v(out_ptr, o1); + out_ptr += 32; + acc_tmp[xx] = aie::zeros(); + } + input -= ((input_channels / 8) * iw * 8) - + 256; // reset to next input_width/32 block + kernels -= + (input_channels / 8) * 64; // reset kernel back to beginning of ic/8 + } + input -= (iw_32) * 256; // 8*32, reset beginning of input ptr + kernels += (input_channels / 8) * 64; // move to next oc/8 weights + out_ptr += (iw_32_rem * + 32); // move to next oc/8 (skip remainder section if present) + } + + } // if(iw_32 > 0) { + + if (iw_32_rem > 0) { + + const int ocs = output_channels; + const int ics = input_channels; + + for (int oc = 0; oc < (ocs / 8); oc++) { + for (int ic = 0; ic < (ics / 8); ic++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x = 0; x < iw_32_rem; x++) { + aie::vector in_a = aie::load_v<32>(input); + input += 32; // act oc0..3(ic0..7) + acc_tmp[x].mac(in_a, in_b); + } + input += (iw * 8) - (iw_32_rem * 32); // Move to next ic/8 position + } + // input ptr just moves to next section + for (int xx = 0; xx < iw_32_rem; xx++) { + aie::vector o1 = acc_tmp[xx].to_vector(scaleT); + aie::store_v(out_ptr, o1); + out_ptr += 32; + acc_tmp[xx] = aie::zeros(); + } + // input -= ((ics-1)/8)*(iw*8)+(iw_32_rem*32); // reset to beginning of + // input ptr for remainder + input -= 448; // reset to beginning of input ptr for remainder + // kernel ptr already at next oc/8 + out_ptr += (iw * 8) - + (iw_32_rem * + 32); // move to next oc/8 (skip remainder section if present) + } + + } // if(iw_32_rem > 0) + + event1(); +} + +#else // UINT8_ACT + +//***************************************************************************** +// conv2d 1x1 - vector +// act: uint8, wts: int8, out: uint8 +// +// Assume IC >= 16 as that gives ideal inner loop schedule +// +// TODO - Restricting input_width is mutiple of 32 +// Because each VMAC works on 4 inputs at a time and we store intermediate +// results in 8 accumulators, having input_width be a multiple of 4*8=32 is +// ideal. However, we should be able to support input_width that is only a +// multiple of 4 but there is some strange scheduling happening now so for +// now, we do not. +//***************************************************************************** +void conv2dk1_ui8_vector(uint8_t *input, int8_t *kernels, uint8_t *output, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, const int scale) { + event0(); + + using MMUL4x8x8 = aie::mmul<4, 8, 8, uint8, int8>; + ::aie::set_saturation( + aie::saturation_mode::saturate); // Needed to saturate properly to uint8 + ::aie::set_rounding( + aie::rounding_mode::positive_inf); // Needed to saturate properly to uint8 + + uint8_t *restrict out_ptr = output; + + const int scaleT = scale; + + MMUL4x8x8 acc_tmp[8]; + for (int x = 0; x < 8; x++) { + acc_tmp[x] = aie::zeros(); + } + + // TODO Keeping this variable gives a wrong behavior and bad schedule! + const int iw = input_width; + const int iw_32 = (input_width / 4) / 8; + + // const int iw_32_rem = (input_width / 4) % 8; + // const int iw_32_rem = (32 / 4) % 8; + assert((input_width / 4) % 8 == 0); + const int iw_32_rem = 0; // TODO - See restriction + + assert((input_channels / 8) > 2); // Assume IC >= 16 + + if (iw_32 > 0) { + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int iw_32c = 0; iw_32c < iw_32; iw_32c++) { + for (int ic = 0; ic < (input_channels / 8); ic++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x = 0; x < 8; x++) { + aie::vector in_a = aie::load_v<32>(input); + input += 32; // act oc0..3(ic0..7) + acc_tmp[x].mac(in_a, in_b); + } + input += (iw * 8) - 256; // Move to next ic/8 position + } + // input ptr just moves to next section + for (int xx = 0; xx < 8; xx++) { + aie::vector o1 = acc_tmp[xx].to_vector(scaleT); + aie::store_v(out_ptr, o1); + out_ptr += 32; + acc_tmp[xx] = aie::zeros(); + } + input -= ((input_channels / 8) * iw * 8) - + 256; // reset to next input_width/32 block + kernels -= + (input_channels / 8) * 64; // reset kernel back to beginning of ic/8 + } + input -= (iw_32) * 256; // 8*32, reset beginning of input ptr + kernels += (input_channels / 8) * 64; // move to next oc/8 weights + out_ptr += (iw_32_rem * + 32); // move to next oc/8 (skip remainder section if present) + } + + } // if(iw_32 > 0) { + + if (iw_32_rem > 0) { + + const int ocs = output_channels; + const int ics = input_channels; + + for (int oc = 0; oc < (ocs / 8); oc++) { + for (int ic = 0; ic < (ics / 8); ic++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x = 0; x < iw_32_rem; x++) { + aie::vector in_a = aie::load_v<32>(input); + input += 32; // act oc0..3(ic0..7) + acc_tmp[x].mac(in_a, in_b); + } + input += (iw * 8) - (iw_32_rem * 32); // Move to next ic/8 position + } + // input ptr just moves to next section + for (int xx = 0; xx < iw_32_rem; xx++) { + aie::vector o1 = acc_tmp[xx].to_vector(scaleT); + aie::store_v(out_ptr, o1); + out_ptr += 32; + acc_tmp[xx] = aie::zeros(); + } + // input -= ((ics-1)/8)*(iw*8)+(iw_32_rem*32); // reset to beginning of + // input ptr for remainder + input -= 448; // reset to beginning of input ptr for remainder + // kernel ptr already at next oc/8 + out_ptr += (iw * 8) - + (iw_32_rem * + 32); // move to next oc/8 (skip remainder section if present) + } + + } // if(iw_32_rem > 0) + + event1(); +} + +#endif // UINT8_ACT + +#endif // Vector + +//***************************************************************************** +// conv2d 1x1 wrappers +//***************************************************************************** +extern "C" { + +#ifdef SCALAR + +#ifdef INT8_ACT + +void conv2dk1_i8(int8_t *input, int8_t *kernels, uint8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale) { + conv2dk1_i8_scalar(input, kernels, output, input_width, input_channels, + output_channels, scale); +} + +#else // UINT8_ACT + +void conv2dk1_ui8(uint8_t *input, int8_t *kernels, uint8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale) { + conv2dk1_ui8_scalar(input, kernels, output, input_width, input_channels, + output_channels, scale); +} + +#endif // UINT8_ACT + +#else // Vector + +#ifdef INT8_ACT + +void conv2dk1_i8(int8_t *input, int8_t *kernels, uint8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale) { + conv2dk1_i8_vector(input, kernels, output, input_width, input_channels, + output_channels, scale); +} + +#else // UINT8_ACT + +void conv2dk1_ui8(uint8_t *input, int8_t *kernels, uint8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale) { + conv2dk1_ui8_vector(input, kernels, output, input_width, input_channels, + output_channels, scale); +} + +#endif // UINT8_ACT + +#endif // Vector + +} // extern "C" \ No newline at end of file diff --git a/aie_kernels/aie2/conv2dk1.h b/aie_kernels/aie2/conv2dk1.h new file mode 100755 index 0000000000..d3c405435e --- /dev/null +++ b/aie_kernels/aie2/conv2dk1.h @@ -0,0 +1,25 @@ +//===- conv2dk1.h -------------------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +#ifndef _CONV2DK1_H +#define _CONV2DK1_H + +extern "C" { +void conv2dk1_i8(int8_t *input, int8_t *kernels, uint8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale); + +void conv2dk1_ui8(uint8_t *input, int8_t *kernels, uint8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale); +} // extern "C" + +#endif diff --git a/aie_kernels/aie2/conv2dk1_i8.cc b/aie_kernels/aie2/conv2dk1_i8.cc new file mode 100644 index 0000000000..73a9d8ed12 --- /dev/null +++ b/aie_kernels/aie2/conv2dk1_i8.cc @@ -0,0 +1,224 @@ +//===- conv2dk1.cc -------------------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2022, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// #define __AIENGINE__ 1 +#define __AIENGINE__ 2 +#define NOCPP +#define __AIEARCH__ 20 + +#include +#include +#include + +#include + +#define REL_WRITE 0 +#define REL_READ 1 + +#ifdef SCALAR + +const int32_t SMAX = 127; +const int32_t SMIN = 128; + +#ifdef INT8_ACT +//***************************************************************************** +// conv2d 1x1 - scalar +// act: int8, wts: int8, out: int8 +//***************************************************************************** +void conv2dk1_i8_scalar(int8_t *input, int8_t *kernels, int8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale) { + event0(); + + int x, ic, oc, ic8, oc8; + // scale=-17; + for (oc = 0; oc < output_channels / 8; oc++) { + for (x = 0; x < input_width; x++) { // col of output image + for (oc8 = 0; oc8 < 8; oc8++) { + int sum = 0; + int sum_srs = 0; + + for (ic = 0; ic < input_channels / 8; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + int val = input[(ic * input_width * 8) + (x * 8) + ic8]; + int k = kernels[(oc * (input_channels / 8) * 64) + (ic * 64) + + (ic8 * 8) + oc8]; + sum += val * k; + } + } + + // sum_srs=sum>>scale; + sum_srs = (sum + (1 << (scale - 1))) >> scale; + sum_srs = (sum_srs > SMAX) ? SMAX : (sum_srs < -SMIN) ? -SMIN : sum_srs; + // sum_srs = input[(oc*input_width*8) + (x*8) + oc8]; + output[(oc * input_width * 8) + (x * 8) + oc8] = sum_srs; + } + } + } + + event1(); +} +#endif // INT8_ACT + +#else // Vector + +#ifdef INT8_ACT + +//***************************************************************************** +// conv2d 1x1 - vector +// act: int8, wts: int8, out: uint8 +// +// Assume IC >= 16 as that gives ideal inner loop schedule +// +// TODO - Restricting input_width is mutiple of 32 +// Because each VMAC works on 4 inputs at a time and we store intermediate +// results in 8 accumulators, having input_width be a multiple of 4*8=32 is +// ideal. However, we should be able to support input_width that is only a +// multiple of 4 but there is some strange scheduling happening now so for +// now, we do not. +//***************************************************************************** +void conv2dk1_i8_vector(int8_t *input, int8_t *kernels, int8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale) { + event0(); + + using MMUL4x8x8 = aie::mmul<4, 8, 8, int8, int8>; + ::aie::set_saturation( + aie::saturation_mode::saturate); // Needed to saturate properly to uint8 + ::aie::set_rounding(aie::rounding_mode::symmetric_inf); // Needed to saturate + // properly to uint8 + + int8_t *restrict out_ptr = output; + + const int scaleT = scale; + + MMUL4x8x8 acc_tmp[8]; + for (int x = 0; x < 8; x++) { + acc_tmp[x] = aie::zeros(); + } + + // TODO Keeping this variable gives a wrong behavior and bad schedule! + const int iw = input_width; + const int iw_32 = (input_width / 4) / 8; + + // const int iw_32_rem = (input_width / 4) % 8; + // const int iw_32_rem = (32 / 4) % 8; + assert((input_width / 4) % 8 == 0); + const int iw_32_rem = 0; // TODO - See restriction + + assert((input_channels / 8) > 2); // Assume IC >= 16 + + if (iw_32 > 0) { + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int iw_32c = 0; iw_32c < iw_32; iw_32c++) { + for (int ic = 0; ic < (input_channels / 8); ic++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x = 0; x < 8; x++) { + aie::vector in_a = aie::load_v<32>(input); + input += 32; // act oc0..3(ic0..7) + acc_tmp[x].mac(in_a, in_b); + } + input += (iw * 8) - 256; // Move to next ic/8 position + } + // input ptr just moves to next section + for (int xx = 0; xx < 8; xx++) { + aie::vector o1 = acc_tmp[xx].to_vector(scaleT); + aie::store_v(out_ptr, o1); + out_ptr += 32; + acc_tmp[xx] = aie::zeros(); + } + input -= ((input_channels / 8) * iw * 8) - + 256; // reset to next input_width/32 block + kernels -= + (input_channels / 8) * 64; // reset kernel back to beginning of ic/8 + } + input -= (iw_32) * 256; // 8*32, reset beginning of input ptr + kernels += (input_channels / 8) * 64; // move to next oc/8 weights + out_ptr += (iw_32_rem * + 32); // move to next oc/8 (skip remainder section if present) + } + + } // if(iw_32 > 0) { + + if (iw_32_rem > 0) { + + const int ocs = output_channels; + const int ics = input_channels; + + for (int oc = 0; oc < (ocs / 8); oc++) { + for (int ic = 0; ic < (ics / 8); ic++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x = 0; x < iw_32_rem; x++) { + aie::vector in_a = aie::load_v<32>(input); + input += 32; // act oc0..3(ic0..7) + acc_tmp[x].mac(in_a, in_b); + } + input += (iw * 8) - (iw_32_rem * 32); // Move to next ic/8 position + } + // input ptr just moves to next section + for (int xx = 0; xx < iw_32_rem; xx++) { + aie::vector o1 = acc_tmp[xx].to_vector(scaleT); + aie::store_v(out_ptr, o1); + out_ptr += 32; + acc_tmp[xx] = aie::zeros(); + } + // input -= ((ics-1)/8)*(iw*8)+(iw_32_rem*32); // reset to beginning of + // input ptr for remainder + input -= 448; // reset to beginning of input ptr for remainder + // kernel ptr already at next oc/8 + out_ptr += (iw * 8) - + (iw_32_rem * + 32); // move to next oc/8 (skip remainder section if present) + } + + } // if(iw_32_rem > 0) + + event1(); +} +#endif // INT8_ACT +#endif // Vector + +//***************************************************************************** +// conv2d 1x1 wrappers +//***************************************************************************** +extern "C" { + +#ifdef SCALAR + +#ifdef INT8_ACT + +void conv2dk1_i8(int8_t *input, int8_t *kernels, int8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale) { + conv2dk1_i8_scalar(input, kernels, output, input_width, input_channels, + output_channels, scale); +} +#endif // INT8_ACT +#else // Vector + +#ifdef INT8_ACT + +void conv2dk1_i8(int8_t *input, int8_t *kernels, int8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale) { + conv2dk1_i8_vector(input, kernels, output, input_width, input_channels, + output_channels, scale); +} +#endif // INT8_ACT +#endif // Vector +} // extern "C" \ No newline at end of file diff --git a/aie_kernels/aie2/conv2dk1_i8.h b/aie_kernels/aie2/conv2dk1_i8.h new file mode 100644 index 0000000000..98925f8a86 --- /dev/null +++ b/aie_kernels/aie2/conv2dk1_i8.h @@ -0,0 +1,22 @@ +//===- conv2dk1.h -------------------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2022, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +#ifndef _CONV2DK1_H +#define _CONV2DK1_H + +extern "C" { +void conv2dk1_i8(int8_t *input, int8_t *kernels, int8_t *output, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale); + +} // extern "C" + +#endif \ No newline at end of file diff --git a/aie_kernels/aie2/conv2dk1_skip.cc b/aie_kernels/aie2/conv2dk1_skip.cc new file mode 100755 index 0000000000..feaa95333b --- /dev/null +++ b/aie_kernels/aie2/conv2dk1_skip.cc @@ -0,0 +1,766 @@ +//===- conv2dk1_skip.cc -------------------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// #define __AIENGINE__ 1 +#define __AIENGINE__ 2 +#define NOCPP +#define __AIEARCH__ 20 + +#include +#include +#include + +#define REL_WRITE 0 +#define REL_READ 1 + +#include + +#ifdef SCALAR + +const int32_t MIN = 128; +const int32_t MAX = 127; +const int32_t UMAX = 255; + +#ifdef INT8_ACT + +//***************************************************************************** +// conv2d 1x1 skip - scalar +// act: uint8, wts: int8, skip: int8, out: uint8 +//***************************************************************************** +void conv2dk1_skip_i8_scalar(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, int8_t *skip, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, const int scale, + const int skip_scale) { + event0(); + + int x, ic, ic2, oc, oc8, ic8, ic8b; + + const int scaleT = scale; + const int skip_scaleT = skip_scale; + // const int scaleT = 10; + // const int skip_scaleT = 0; + + for (oc = 0; oc < output_channels / 8; oc++) { + for (oc8 = 0; oc8 < 8; oc8++) { + for (x = 0; x < input_width; x++) { // col of output image + int sum = 0; + int sum_srs = 0; + int64_t skip_sum = 0; + int skip_sum_srs_final = 0; + int skip_sum_srs_final_out = 0; + int skip_temp = 0; + for (ic = 0; ic < input_channels / 16; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + // int val = input0[ic * input_width + x]; + int val = input0[(ic * input_width * 8) + (x * 8) + ic8]; + // int k = kernels[oc * input_channels + ic]; + int k = kernels[(oc * (input_channels / 8) * 64) + (ic * 64) + + (ic8 * 8) + oc8]; + sum += val * k; + } + } + // for (ic2 = input_channels/16; ic2 < input_channels/8; ic2++) { + for (ic2 = 0; ic2 < input_channels / 16; ic2++) { + for (ic8b = 0; ic8b < 8; ic8b++) { + // int val2 = input1[ic2 * input_width + x]; + int val2 = input1[(ic2 * input_width * 8) + (x * 8) + + ic8b]; // TODO ic2 should be shifted? + // int k2 = kernels[oc * input_channels + ic2]; + int k2 = kernels[(oc * (input_channels / 8) * 64) + + ((ic2 + (input_channels / 16)) * 64) + (ic8b * 8) + + oc8]; + sum += val2 * k2; + } + } + // scale for convolution + sum_srs = (sum + (1 << (scaleT - 1))) >> scaleT; + sum_srs = (sum_srs > MAX) ? MAX + : (sum_srs < -MIN) ? -MIN + : sum_srs; // clip + // sum_srs = (sum_srs > UMAX) ? UMAX : (sum_srs < 0) ? 0 : sum_srs; + // //clip + + // scale for residual + // skip_temp=skip[oc * input_width + x]; + skip_temp = skip[(oc * input_width * 8) + (x * 8) + oc8]; + skip_sum = sum_srs + skip_temp; + // skip_sum= sum_srs; + + skip_sum_srs_final = + (skip_sum + (1 << (skip_scaleT - 1))) >> skip_scaleT; + skip_sum_srs_final_out = (skip_sum_srs_final > UMAX) ? UMAX + : (skip_sum_srs_final < 0) + ? 0 + : skip_sum_srs_final; // clip + + // output[oc * input_width + x] = skip_sum_srs_final_out; + output[(oc * input_width * 8) + (x * 8) + oc8] = skip_sum_srs_final_out; + + // output[oc * input_width + x] = sum; + // output[oc * input_width + x] = sum+skip[oc * input_width + x]; + } + } + } + + // for (oc = 0; oc < output_channels; ++oc) { + // for (x = 0; x < input_width; ++x) { + // output[oc * input_width + x]=skip[oc * input_width + x];} + // } + + event1(); +} + +#else // UINT8_ACT + +//***************************************************************************** +// conv2d 1x1 skip - scalar +// act: uint8, wts: int8, skip: uint8, out: uint8 +//***************************************************************************** +void conv2dk1_skip_ui8_scalar(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, uint8_t *skip, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, const int scale, + const int skip_scale) { + event0(); + + int x, ic, ic2, oc, oc8, ic8, ic8b; + + const int scaleT = scale; + const int skip_scaleT = skip_scale; + // const int scaleT = 10; + // const int skip_scaleT = 0; + + for (oc = 0; oc < output_channels / 8; oc++) { + for (oc8 = 0; oc8 < 8; oc8++) { + for (x = 0; x < input_width; x++) { // col of output image + int sum = 0; + int sum_srs = 0; + int skip_sum = 0; + int skip_sum_srs_final = 0; + int skip_sum_srs_final_out = 0; + uint8_t skip_temp = 0; + for (ic = 0; ic < input_channels / 16; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + // int val = input0[ic * input_width + x]; + uint8_t val = input0[(ic * input_width * 8) + (x * 8) + ic8]; + // int k = kernels[oc * input_channels + ic]; + int k = kernels[(oc * (input_channels / 8) * 64) + (ic * 64) + + (ic8 * 8) + oc8]; + sum += val * k; + } + } + for (ic2 = 0; ic2 < input_channels / 16; ic2++) { + for (ic8b = 0; ic8b < 8; ic8b++) { + // int val2 = input1[ic2 * input_width + x]; + uint8_t val2 = input1[(ic2 * input_width * 8) + (x * 8) + + ic8b]; // TODO ic2 should be shifted? + // int k2 = kernels[oc * input_channels + ic2]; + int k2 = kernels[(oc * (input_channels / 8) * 64) + + ((ic2 + (input_channels / 16)) * 64) + (ic8b * 8) + + oc8]; + sum += val2 * k2; + } + } + // scale for convolution + sum_srs = (sum + (1 << (scaleT - 1))) >> scaleT; + sum_srs = (sum_srs > MAX) ? MAX + : (sum_srs < -MIN) ? -MIN + : sum_srs; // clip + + // scale for residual + skip_temp = skip[(oc * input_width * 8) + (x * 8) + oc8]; + skip_sum = sum_srs + skip_temp; + + // skip_sum= sum_srs; + + skip_sum_srs_final = + (skip_sum + (1 << (skip_scaleT - 1))) >> skip_scaleT; + skip_sum_srs_final_out = (skip_sum_srs_final > UMAX) ? UMAX + : (skip_sum_srs_final < 0) + ? 0 + : skip_sum_srs_final; // clip + + // output[oc * input_width + x] = skip_sum_srs_final_out; + output[(oc * input_width * 8) + (x * 8) + oc8] = skip_sum_srs_final_out; + + // output[oc * input_width + x] = sum; + // output[oc * input_width + x] = sum+skip[oc * input_width + x]; + } + } + } + + // for (oc = 0; oc < output_channels; ++oc) { + // for (x = 0; x < input_width; ++x) { + // output[oc * input_width + x]=skip[oc * input_width + x];} + // } + + event1(); +} + +#endif // UINT8_ACT + +#else // Vector + +#ifdef INT8_ACT + +//***************************************************************************** +// conv2d 1x1 skip - vector +// act: uint8, wts: int8, skip: int8, out: uint8 +// +// Assume IC >= 16 as that gives ideal inner loop schedule +// +// TODO - Restricting input_width is mutiple of 32 +// Because each VMAC works on 4 inputs at a time and we store intermediate +// results in 8 accumulators, having input_width be a multiple of 4*8=32 is +// ideal. However, we should be able to support input_width that is only a +// multiple of 4 but there is some strange scheduling happening now so for +// now, we do not. +//***************************************************************************** +void conv2dk1_skip_i8_vector(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, int8_t *skip, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, const int scale, + const int skip_scale) { + event0(); + + using MMUL4x8x8 = aie::mmul<4, 8, 8, uint8, int8>; + ::aie::set_saturation( + aie::saturation_mode::saturate); // Needed to saturate properly to uint8 + ::aie::set_rounding( + aie::rounding_mode::positive_inf); // Needed to saturate properly to uint8 + + uint8_t *restrict out_ptr = output; + int8_t *i_out_ptr = (int8_t *)output; + int8_t *restrict skip_ptr = skip; + + const int scaleT = scale; + const int skip_scaleT = skip_scale; + + constexpr int NUM_ACC = 8; + + const int iw_32 = (input_width / 4) / 8; + const int iw = input_width; + // const int iw_32_rem = (input_width / 4) % 8; + assert((input_width / 4) % 8 == 0); + const int iw_32_rem = 0; // TODO - See restriction + + assert((input_channels / 8) > 2); // Assume IC >= 16 + + int input_offset1 = 0; + int input_offset2 = 0; + + if (iw_32 > 0) { + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int x = 0; x < iw_32; x++) { + MMUL4x8x8 acc_tmp[NUM_ACC]; + for (int i = 0; i < NUM_ACC; i++) { + acc_tmp[i] = aie::zeros(); + } + for (int ic = 0; ic < (input_channels / 16); ic++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x8 = 0; x8 < NUM_ACC; x8++) { + aie::vector in_a = + aie::load_v<32>(input0 + input_offset1); + input_offset1 += 32; // act oc0..3(ic0..7) + acc_tmp[x8].mac(in_a, in_b); + } + input_offset1 += + (iw * 8) - + 256; // Move to next ic/8 position. 256 = 32 input * 8 ic + } + for (int ic = 0; ic < (input_channels / 16); ic++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x8 = 0; x8 < NUM_ACC; x8++) { + aie::vector in_a = + aie::load_v<32>(input1 + input_offset2); + input_offset2 += 32; // act oc0..3(ic0..7) + acc_tmp[x8].mac(in_a, in_b); + } + input_offset2 += + (iw * 8) - + 256; // Move to next ic/8 position. 256 = 32 input * 8 ic + } + // input ptr just moves to next section + for (int x8 = 0; x8 < NUM_ACC; x8++) { + aie::vector skip1 = aie::load_v<32>(skip_ptr); + skip_ptr += 32; + + aie::accum accj; + accj.from_vector(skip1, 0); + accj = aie::add(accj, acc_tmp[x8].to_vector(scaleT)); + // accj = aie::mac(accj, acc_tmp[x8].to_vector(scaleT), + // (uint8_t)1); + aie::vector o1 = accj.to_vector(skip_scaleT); + aie::store_v(out_ptr, o1); + out_ptr += 32; + // acc_tmp[x8] = aie::zeros(); + } + input_offset1 -= + ((input_channels / 16) * iw * 8) - + 256; // reset to next input_width/32 block. 256 = 32 input * 8 ic + input_offset2 -= + ((input_channels / 16) * iw * 8) - + 256; // reset to next input_width/32 block. 256 = 32 input * 8 ic + kernels -= + (input_channels / 8) * 64; // reset kernel back to beginning of ic/8 + } // for(int x=0; x skip1 = aie::load_v<32>(skip_ptr); + // skip_ptr += 32; aie::vector skip1 = + // aie::load_v<32>(skip_ptr); skip_ptr += 32; + // // aie::vector tmp = aie::load_v<32>(out_ptr); + // aie::vector tmp = aie::load_v<32>(i_out_ptr); + // i_out_ptr += 32; aie::accum accj; + // accj.from_vector(skip1,0); + // accj = aie::mac(accj, tmp, (uint8_t)1); + // aie::vector o3 = + // accj.to_vector(skip_scaleT); aie::store_v(out_ptr, + // o3); out_ptr += 32; + // } + // } + // out_ptr += (iw_32_rem*32); + // skip_ptr += (iw_32_rem*32); + // } + + out_ptr -= (output_channels - 1) * iw + (iw_32_rem * 32); + skip_ptr -= (output_channels - 1) * iw + (iw_32_rem * 32); + + } // if(iw_32 > 0) { + + // **TODO** Move out_ptr and skip_ptr back to first oc/8 rem location + + // if(iw_32_rem > 0) { + + // const int ocs = output_channels; + // const int ics = input_channels; + + // input_offset1 = 0; // TODO need to offset this to ic_32_rem position + // input_offset2 = 0; // TODO need to offset this to ic_32_rem position + + // for(int oc=0; oc<(ocs/8); oc++) { + // for(int ic=0; ic<(ics/16); ic++) { + // // For ic = oc = 8, we can load all the weights in 1x 512b vec reg + // (2x 256b loads) + // // For ic > 8, we would load the next 64 weights that are + // ic8..15(oc0..7) + // // For oc > 8, we would load the next 64 weights after all the ic + // weights {OC}{IC}{IC8}{OC8} aie::vector in_b = + // aie::load_v<64>(kernels); kernels+=64; // wts ic0..7(oc0..7) + + // for(int x=0; x in_a = + // aie::load_v<32>(input0+input_offset1); input_offset1 += 32; // + // act oc0..3(ic0..7) acc_tmp[x].mac(in_a, in_b); + // } + // input_offset1 += (iw*8)-(iw_32_rem*32); // Move to next ic/8 + // position, TODO -(iw_32_rem*8)?? + // } + // for(int ic=0; ic<(ics/16); ic++) { + // // For ic = oc = 8, we can load all the weights in 1x 512b vec reg + // (2x 256b loads) + // // For ic > 8, we would load the next 64 weights that are + // ic8..15(oc0..7) + // // For oc > 8, we would load the next 64 weights after all the ic + // weights {OC}{IC}{IC8}{OC8} aie::vector in_b = + // aie::load_v<64>(kernels); kernels+=64; // wts ic0..7(oc0..7) + + // for(int x=0; x in_a = + // aie::load_v<32>(input1+input_offset2); input_offset2 += 32; // + // act oc0..3(ic0..7) acc_tmp[x].mac(in_a, in_b); + // } + // input_offset2 += (iw*8)-(iw_32_rem*32); // Move to next ic/8 + // position + // } + // // input ptr just moves to next section + // for(int xx=0; xx o1 = acc_tmp[xx].to_vector(scaleT); + // aie::vector o1 = acc_tmp[xx].to_vector(scaleT); + // // aie::store_v(out_ptr, o1); out_ptr += 32; + // aie::store_v(i_out_ptr, o1); i_out_ptr += 32; + // acc_tmp[xx] = aie::zeros(); + // } + // // input -= ((ics-1)/8)*(iw*8)+(iw_32_rem*32); // reset to beginning + // of input ptr for remainder input_offset1 -= 448; // reset to + // beginning of input ptr for remainder input_offset2 -= 448; // reset + // to beginning of input ptr for remainder + // // kernel ptr already at next oc/8 + // i_out_ptr += (iw*8)-(iw_32_rem*32); // move to next oc/8 + // (skip remainder section if present) + // } + + // i_out_ptr -= output_channels*iw; + + // for(int oc=0; oc<(output_channels/8); oc++) { + // for(int x8=0; x8 skip1 = aie::load_v<32>(skip_ptr); skip_ptr += + // 32; aie::vector tmp = aie::load_v<32>(i_out_ptr); + // aie::accum accj; + // accj.from_vector(skip1,0); + // accj = aie::mac(accj, tmp, (uint8_t)1); + // aie::vector o3 = accj.to_vector(skip_scaleT); + // aie::store_v(out_ptr, o3); out_ptr += 32; + // } + // out_ptr += (iw*8)-(iw_32_rem*32); + // skip_ptr += (iw*8)-(iw_32_rem*32); + // } + + // } // if(iw_32_rem > 0) + + event1(); +} + +#else // UINT8_ACT + +//***************************************************************************** +// conv2d 1x1 skip - vector +// act: uint8, wts: int8, skip: uint8, out: uint8 +// +// Assume IC >= 16 as that gives ideal inner loop schedule +// +// TODO - Restricting input_width is mutiple of 32 +// Because each VMAC works on 4 inputs at a time and we store intermediate +// results in 8 accumulators, having input_width be a multiple of 4*8=32 is +// ideal. However, we should be able to support input_width that is only a +// multiple of 4 but there is some strange scheduling happening now so for +// now, we do not. +//***************************************************************************** +void conv2dk1_skip_ui8_vector(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, uint8_t *skip, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, const int scale, + const int skip_scale) { + event0(); + + using MMUL4x8x8 = aie::mmul<4, 8, 8, uint8, int8>; + ::aie::set_saturation( + aie::saturation_mode::saturate); // Needed to saturate properly to uint8 + ::aie::set_rounding( + aie::rounding_mode::positive_inf); // Needed to saturate properly to uint8 + + uint8_t *restrict out_ptr = output; + int8_t *i_out_ptr = (int8_t *)output; + uint8_t *restrict skip_ptr = skip; + + const int scaleT = scale; + const int skip_scaleT = skip_scale; + + constexpr int NUM_ACC = 8; + + const int iw_32 = (input_width / 4) / 8; + const int iw = input_width; + // const int iw_32_rem = (input_width / 4) % 8; + assert((input_width / 4) % 8 == 0); + const int iw_32_rem = 0; // TODO - See restriction + + assert((input_channels / 8) > 2); // Assume IC >= 16 + + int input_offset1 = 0; + int input_offset2 = 0; + + if (iw_32 > 0) { + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int x = 0; x < iw_32; x++) { + MMUL4x8x8 acc_tmp[NUM_ACC]; + for (int i = 0; i < NUM_ACC; i++) { + acc_tmp[i] = aie::zeros(); + } + for (int ic = 0; ic < (input_channels / 16); ic++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x8 = 0; x8 < NUM_ACC; x8++) { + aie::vector in_a = + aie::load_v<32>(input0 + input_offset1); + input_offset1 += 32; // act oc0..3(ic0..7) + acc_tmp[x8].mac(in_a, in_b); + } + input_offset1 += + (iw * 8) - + 256; // Move to next ic/8 position. 256 = 32 input * 8 ic + } + for (int ic = 0; ic < (input_channels / 16); ic++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x8 = 0; x8 < NUM_ACC; x8++) { + aie::vector in_a = + aie::load_v<32>(input1 + input_offset2); + input_offset2 += 32; // act oc0..3(ic0..7) + acc_tmp[x8].mac(in_a, in_b); + } + input_offset2 += + (iw * 8) - + 256; // Move to next ic/8 position. 256 = 32 input * 8 ic + } + // input ptr just moves to next section + for (int x8 = 0; x8 < NUM_ACC; x8++) { + aie::vector skip1 = aie::load_v<32>(skip_ptr); + skip_ptr += 32; + + aie::accum accj; + accj.from_vector(skip1, 0); + accj = aie::add(accj, acc_tmp[x8].to_vector(scaleT)); + // accj = aie::mac(accj, acc_tmp[x8].to_vector(scaleT), + // (uint8_t)1); + aie::vector o1 = accj.to_vector(skip_scaleT); + aie::store_v(out_ptr, o1); + out_ptr += 32; + // acc_tmp[x8] = aie::zeros(); + } + input_offset1 -= + ((input_channels / 16) * iw * 8) - + 256; // reset to next input_width/32 block. 256 = 32 input * 8 ic + input_offset2 -= + ((input_channels / 16) * iw * 8) - + 256; // reset to next input_width/32 block. 256 = 32 input * 8 ic + kernels -= + (input_channels / 8) * 64; // reset kernel back to beginning of ic/8 + } // for(int x=0; x skip1 = aie::load_v<32>(skip_ptr); + // skip_ptr += 32; aie::vector skip1 = + // aie::load_v<32>(skip_ptr); skip_ptr += 32; + // // aie::vector tmp = aie::load_v<32>(out_ptr); + // aie::vector tmp = aie::load_v<32>(i_out_ptr); + // i_out_ptr += 32; aie::accum accj; + // accj.from_vector(skip1,0); + // accj = aie::mac(accj, tmp, (uint8_t)1); + // aie::vector o3 = + // accj.to_vector(skip_scaleT); aie::store_v(out_ptr, + // o3); out_ptr += 32; + // } + // } + // out_ptr += (iw_32_rem*32); + // skip_ptr += (iw_32_rem*32); + // } + + out_ptr -= (output_channels - 1) * iw + (iw_32_rem * 32); + skip_ptr -= (output_channels - 1) * iw + (iw_32_rem * 32); + + } // if(iw_32 > 0) { + + // **TODO** Move out_ptr and skip_ptr back to first oc/8 rem location + + // if(iw_32_rem > 0) { + + // const int ocs = output_channels; + // const int ics = input_channels; + + // input_offset1 = 0; // TODO need to offset this to ic_32_rem position + // input_offset2 = 0; // TODO need to offset this to ic_32_rem position + + // for(int oc=0; oc<(ocs/8); oc++) { + // for(int ic=0; ic<(ics/16); ic++) { + // // For ic = oc = 8, we can load all the weights in 1x 512b vec reg + // (2x 256b loads) + // // For ic > 8, we would load the next 64 weights that are + // ic8..15(oc0..7) + // // For oc > 8, we would load the next 64 weights after all the ic + // weights {OC}{IC}{IC8}{OC8} aie::vector in_b = + // aie::load_v<64>(kernels); kernels+=64; // wts ic0..7(oc0..7) + + // for(int x=0; x in_a = + // aie::load_v<32>(input0+input_offset1); input_offset1 += 32; // + // act oc0..3(ic0..7) acc_tmp[x].mac(in_a, in_b); + // } + // input_offset1 += (iw*8)-(iw_32_rem*32); // Move to next ic/8 + // position, TODO -(iw_32_rem*8)?? + // } + // for(int ic=0; ic<(ics/16); ic++) { + // // For ic = oc = 8, we can load all the weights in 1x 512b vec reg + // (2x 256b loads) + // // For ic > 8, we would load the next 64 weights that are + // ic8..15(oc0..7) + // // For oc > 8, we would load the next 64 weights after all the ic + // weights {OC}{IC}{IC8}{OC8} aie::vector in_b = + // aie::load_v<64>(kernels); kernels+=64; // wts ic0..7(oc0..7) + + // for(int x=0; x in_a = + // aie::load_v<32>(input1+input_offset2); input_offset2 += 32; // + // act oc0..3(ic0..7) acc_tmp[x].mac(in_a, in_b); + // } + // input_offset2 += (iw*8)-(iw_32_rem*32); // Move to next ic/8 + // position + // } + // // input ptr just moves to next section + // for(int xx=0; xx o1 = acc_tmp[xx].to_vector(scaleT); + // aie::vector o1 = acc_tmp[xx].to_vector(scaleT); + // // aie::store_v(out_ptr, o1); out_ptr += 32; + // aie::store_v(i_out_ptr, o1); i_out_ptr += 32; + // acc_tmp[xx] = aie::zeros(); + // } + // // input -= ((ics-1)/8)*(iw*8)+(iw_32_rem*32); // reset to beginning + // of input ptr for remainder input_offset1 -= 448; // reset to + // beginning of input ptr for remainder input_offset2 -= 448; // reset + // to beginning of input ptr for remainder + // // kernel ptr already at next oc/8 + // i_out_ptr += (iw*8)-(iw_32_rem*32); // move to next oc/8 + // (skip remainder section if present) + // } + + // i_out_ptr -= output_channels*iw; + + // for(int oc=0; oc<(output_channels/8); oc++) { + // for(int x8=0; x8 skip1 = aie::load_v<32>(skip_ptr); skip_ptr += + // 32; aie::vector tmp = aie::load_v<32>(i_out_ptr); + // aie::accum accj; + // accj.from_vector(skip1,0); + // accj = aie::mac(accj, tmp, (uint8_t)1); + // aie::vector o3 = accj.to_vector(skip_scaleT); + // aie::store_v(out_ptr, o3); out_ptr += 32; + // } + // out_ptr += (iw*8)-(iw_32_rem*32); + // skip_ptr += (iw*8)-(iw_32_rem*32); + // } + + // } // if(iw_32_rem > 0) + + event1(); +} + +#endif // UINT8_ACT + +#endif // Vector + +//***************************************************************************** +// conv2d 1x1 skip wrappers +//***************************************************************************** +extern "C" { + +#ifdef SCALAR + +#ifdef INT8_ACT + +void conv2dk1_skip_i8(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, int8_t *skip, const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, const int scale, + const int skip_scale) { + conv2dk1_skip_i8_scalar(input0, input1, kernels, output, skip, input_width, + input_channels, output_channels, scale, skip_scale); +} + +#else // UINT8_ACT + +void conv2dk1_skip_ui8(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, uint8_t *skip, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale, + const int skip_scale) { + conv2dk1_skip_ui8_scalar(input0, input1, kernels, output, skip, input_width, + input_channels, output_channels, scale, skip_scale); +} + +#endif // UINT8_ACT + +#else // Vector + +#ifdef INT8_ACT + +void conv2dk1_skip_i8(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, int8_t *skip, const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, const int scale, + const int skip_scale) { + conv2dk1_skip_i8_vector(input0, input1, kernels, output, skip, input_width, + input_channels, output_channels, scale, skip_scale); +} + +#else // UINT8_ACT + +void conv2dk1_skip_ui8(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, uint8_t *skip, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale, + const int skip_scale) { + conv2dk1_skip_ui8_vector(input0, input1, kernels, output, skip, input_width, + input_channels, output_channels, scale, skip_scale); +} + +#endif // UINT8_ACT + +#endif // Vector + +} // extern "C" \ No newline at end of file diff --git a/aie_kernels/aie2/conv2dk1_skip.h b/aie_kernels/aie2/conv2dk1_skip.h new file mode 100755 index 0000000000..8daa62e507 --- /dev/null +++ b/aie_kernels/aie2/conv2dk1_skip.h @@ -0,0 +1,31 @@ +//===- conv2dk1_skip.h -------------------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +#ifndef _CONV2DK1_SKIP_H +#define _CONV2DK1_SKIP_H + +extern "C" { + +void conv2dk1_skip_i8(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, int8_t *skip, const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, const int scale, + const int skip_scale); + +void conv2dk1_skip_ui8(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, uint8_t *skip, + const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int scale, + const int skip_scale); + +} // extern "C" + +#endif diff --git a/aie_kernels/aie2/conv2dk1_skip_init.cc b/aie_kernels/aie2/conv2dk1_skip_init.cc new file mode 100755 index 0000000000..591377479f --- /dev/null +++ b/aie_kernels/aie2/conv2dk1_skip_init.cc @@ -0,0 +1,934 @@ +//===- conv2dk1_skip_init.cc -------------------------------------------------*- +// C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// #define __AIENGINE__ 1 +#define __AIENGINE__ 2 +#define NOCPP +#define __AIEARCH__ 20 + +#include +#include +#include + +#define REL_WRITE 0 +#define REL_READ 1 + +#include + +#ifdef SCALAR + +const int32_t MIN = 128; +const int32_t MAX = 127; +const int32_t UMAX = 255; + +#ifdef INT8_ACT + +//***************************************************************************** +// conv2d 1x1 skip init - scalar +// act: uint8, wts: int8, skip: int8, out: uint8 +//***************************************************************************** +// NOTE: Assumes input_channels >= 16 +void conv2dk1_skip_init_i8_scalar( + uint8_t *input0, uint8_t *input1, int8_t *kernels, uint8_t *output, + int8_t *skip, const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int32_t input_channels_skip, + const int scale, const int skip_scale, const int scale_skip_conv) { + event0(); + + int x, ic, ic2, ic3, oc, oc8, ic8, ic8b, ic8c; + + const int scaleT = scale; + const int skip_scaleT = skip_scale; + const int skip_scaleT_conv = scale_skip_conv; + const int wts_offset = output_channels * input_channels; + + // const int scaleT = 10; + // const int skip_scaleT = 0; + + for (oc = 0; oc < output_channels / 8; oc++) { + for (oc8 = 0; oc8 < 8; oc8++) { + for (x = 0; x < input_width; x++) { // col of output image + int sum = 0; + int sum_srs = 0; + int sum_skip_conv = 0; + int sum_skip_conv_srs = 0; + int64_t skip_sum = 0; + int skip_sum_srs_final = 0; + int skip_sum_srs_final_out = 0; + int skip_temp = 0; + for (ic = 0; ic < input_channels / 16; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + // int val = input0[ic * input_width + x]; + int val = input0[(ic * input_width * 8) + (x * 8) + ic8]; + // int k = kernels[oc * input_channels + ic]; + int k = kernels[(oc * (input_channels / 8) * 64) + (ic * 64) + + (ic8 * 8) + oc8]; + sum += val * k; + } + } + // for (ic2 = input_channels/16; ic2 < input_channels/8; ic2++) { + for (ic2 = 0; ic2 < input_channels / 16; ic2++) { + for (ic8b = 0; ic8b < 8; ic8b++) { + // int val2 = input1[ic2 * input_width + x]; + int val2 = input1[(ic2 * input_width * 8) + (x * 8) + + ic8b]; // TODO ic2 should be shifted? + // int k2 = kernels[oc * input_channels + ic2]; + int k2 = kernels[(oc * (input_channels / 8) * 64) + + ((ic2 + (input_channels / 16)) * 64) + (ic8b * 8) + + oc8]; + sum += val2 * k2; + } + } + // scale for convolution + sum_srs = (sum + (1 << (scaleT - 1))) >> scaleT; + sum_srs = (sum_srs > MAX) ? MAX + : (sum_srs < -MIN) ? -MIN + : sum_srs; // clip + // sum_srs = (sum_srs > UMAX) ? UMAX : (sum_srs < 0) ? 0 : sum_srs; + // //clip + // ******************************************************************************************************************** + // skip convolution + for (ic3 = 0; ic3 < input_channels_skip / 8; ic3++) { + for (ic8c = 0; ic8c < 8; ic8c++) { + int val3 = skip[(ic3 * input_width * 8) + (x * 8) + ic8c]; + int k3 = kernels[(oc * (input_channels_skip / 8) * 64) + + (ic3 * 64) + (ic8c * 8) + oc8 + wts_offset]; + sum_skip_conv += val3 * k3; + } + } + sum_skip_conv_srs = + (sum_skip_conv + (1 << (skip_scaleT_conv - 1))) >> skip_scaleT_conv; + sum_skip_conv_srs = (sum_skip_conv_srs > MAX) ? MAX + : (sum_skip_conv_srs < -MIN) ? -MIN + : sum_skip_conv_srs; + // ******************************************************************************************************************** + // scale for residual + // skip_temp=skip[oc * input_width + x]; + // skip_temp=skip[(oc*input_width*8) + (x*8) + oc8] ; + skip_temp = sum_skip_conv_srs; + skip_sum = sum_srs + skip_temp; + skip_sum_srs_final = + (skip_sum + (1 << (skip_scaleT - 1))) >> skip_scaleT; + skip_sum_srs_final_out = (skip_sum_srs_final > UMAX) ? UMAX + : (skip_sum_srs_final < 0) + ? 0 + : skip_sum_srs_final; // clip + + // output[oc * input_width + x] = skip_sum_srs_final_out; + output[(oc * input_width * 8) + (x * 8) + oc8] = skip_sum_srs_final_out; + + // output[oc * input_width + x] = sum; + // output[oc * input_width + x] = sum+skip[oc * input_width + x]; + } + } + } + + // for (oc = 0; oc < output_channels; ++oc) { + // for (x = 0; x < input_width; ++x) { + // output[oc * input_width + x]=skip[oc * input_width + x];} + // } + + event1(); +} + +#else // UINT8_ACT + +//***************************************************************************** +// conv2d 1x1 skip init - scalar +// act: uint8, wts: int8, skip: uint8, out: uint8 +// +// NOTE: TODO Currently just a copy of the i8 code. No real differences +//***************************************************************************** +void conv2dk1_skip_init_ui8_scalar( + uint8_t *input0, uint8_t *input1, int8_t *kernels, uint8_t *output, + uint8_t *skip, const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int32_t input_channels_skip, + const int scale, const int skip_scale, const int scale_skip_conv) { + event0(); + + int x, ic, ic2, ic3, oc, oc8, ic8, ic8b, ic8c; + + const int scaleT = scale; + const int skip_scaleT = skip_scale; + const int skip_scaleT_conv = scale_skip_conv; + const int wts_offset = output_channels * input_channels; + + // const int scaleT = 10; + // const int skip_scaleT = 0; + + for (oc = 0; oc < output_channels / 8; oc++) { + for (oc8 = 0; oc8 < 8; oc8++) { + for (x = 0; x < input_width; x++) { // col of output image + int sum = 0; + int sum_srs = 0; + int sum_skip_conv = 0; + int sum_skip_conv_srs = 0; + int64_t skip_sum = 0; + int skip_sum_srs_final = 0; + int skip_sum_srs_final_out = 0; + int skip_temp = 0; + for (ic = 0; ic < input_channels / 16; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + // int val = input0[ic * input_width + x]; + int val = input0[(ic * input_width * 8) + (x * 8) + ic8]; + // int k = kernels[oc * input_channels + ic]; + int k = kernels[(oc * (input_channels / 8) * 64) + (ic * 64) + + (ic8 * 8) + oc8]; + sum += val * k; + } + } + // for (ic2 = input_channels/16; ic2 < input_channels/8; ic2++) { + for (ic2 = 0; ic2 < input_channels / 16; ic2++) { + for (ic8b = 0; ic8b < 8; ic8b++) { + // int val2 = input1[ic2 * input_width + x]; + int val2 = input1[(ic2 * input_width * 8) + (x * 8) + + ic8b]; // TODO ic2 should be shifted? + // int k2 = kernels[oc * input_channels + ic2]; + int k2 = kernels[(oc * (input_channels / 8) * 64) + + ((ic2 + (input_channels / 16)) * 64) + (ic8b * 8) + + oc8]; + sum += val2 * k2; + } + } + // scale for convolution + sum_srs = (sum + (1 << (scaleT - 1))) >> scaleT; + sum_srs = (sum_srs > MAX) ? MAX + : (sum_srs < -MIN) ? -MIN + : sum_srs; // clip + // sum_srs = (sum_srs > UMAX) ? UMAX : (sum_srs < 0) ? 0 : sum_srs; + // //clip + // ******************************************************************************************************************** + // skip convolution + for (ic3 = 0; ic3 < input_channels_skip / 8; ic3++) { + for (ic8c = 0; ic8c < 8; ic8c++) { + int val3 = skip[(ic3 * input_width * 8) + (x * 8) + ic8c]; + int k3 = kernels[(oc * (input_channels_skip / 8) * 64) + + (ic3 * 64) + (ic8c * 8) + oc8 + wts_offset]; + sum_skip_conv += val3 * k3; + } + } + sum_skip_conv_srs = + (sum_skip_conv + (1 << (skip_scaleT_conv - 1))) >> skip_scaleT_conv; + sum_skip_conv_srs = (sum_skip_conv_srs > MAX) ? MAX + : (sum_skip_conv_srs < -MIN) ? -MIN + : sum_skip_conv_srs; + // ******************************************************************************************************************** + // scale for residual + // skip_temp=skip[oc * input_width + x]; + // skip_temp=skip[(oc*input_width*8) + (x*8) + oc8] ; + skip_temp = sum_skip_conv_srs; + skip_sum = sum_srs + skip_temp; + skip_sum_srs_final = + (skip_sum + (1 << (skip_scaleT - 1))) >> skip_scaleT; + skip_sum_srs_final_out = (skip_sum_srs_final > UMAX) ? UMAX + : (skip_sum_srs_final < 0) + ? 0 + : skip_sum_srs_final; // clip + + // output[oc * input_width + x] = skip_sum_srs_final_out; + output[(oc * input_width * 8) + (x * 8) + oc8] = skip_sum_srs_final_out; + + // output[oc * input_width + x] = sum; + // output[oc * input_width + x] = sum+skip[oc * input_width + x]; + } + } + } + + // for (oc = 0; oc < output_channels; ++oc) { + // for (x = 0; x < input_width; ++x) { + // output[oc * input_width + x]=skip[oc * input_width + x];} + // } + + event1(); +} + +#endif // UINT8_ACT + +#else // Vector + +#ifdef INT8_ACT + +//***************************************************************************** +// conv2d 1x1 skip init - vector +// act: uint8, wts: int8, skip: int8, out: uint8 +//***************************************************************************** +void conv2dk1_skip_init_i8_vector( + uint8_t *input0, uint8_t *input1, int8_t *kernels, uint8_t *output, + int8_t *skip, const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int32_t input_channels_skip, + const int scale, const int skip_scale, const int scale_skip_conv) + +{ + event0(); + + using MMUL4x8x8 = aie::mmul<4, 8, 8, uint8, int8>; + using MMULi4x8x8 = aie::mmul<4, 8, 8, int8, int8>; + ::aie::set_saturation( + aie::saturation_mode::saturate); // Needed to saturate properly to uint8 + ::aie::set_rounding( + aie::rounding_mode::positive_inf); // Needed to saturate properly to uint8 + + uint8_t * /*restrict*/ out_ptr = output; + int8_t *i_out_ptr = (int8_t *)output; + // uint8_t * restrict skip_ptr = skip; + int8_t *restrict skip_ptr = skip; + + const int wts_offset = output_channels * input_channels; + int8_t *kernels_skip = kernels + wts_offset; + + const int scaleT = scale; + const int skip_scaleT = skip_scale; + const int scaleT_skip_conv = scale_skip_conv; + + constexpr int NUM_ACC = 8; + + const int iw_32 = (input_width / 4) / 8; + const int iw = input_width; + const int iw_32_rem = (input_width / 4) % 8; + + int input_offset1 = 0; + int input_offset2 = 0; + int input_offset3 = 0; + + // aie::vector vec_tmp[NUM_ACC]; + + if (iw_32 > 0) { + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int x = 0; x < iw_32; x++) { + aie::vector vec_conv[NUM_ACC]; + aie::vector vec_skip[NUM_ACC]; + + { // conv section + MMUL4x8x8 acc_tmp[NUM_ACC]; + for (int x8 = 0; x8 < NUM_ACC; x8++) { + acc_tmp[x8] = aie::zeros(); + } + + for (int ic = 0; ic < (input_channels / 16); ic++) { // half ic/8 + // For ic = oc = 8, we can load all the weights in 1x 512b vec reg + // (2x 256b loads) For ic > 8, we would load the next 64 weights + // that are ic8..15(oc0..7) For oc > 8, we would load the next 64 + // weights after all the ic weights {OC}{IC}{IC8}{OC8} + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x8 = 0; x8 < NUM_ACC; x8++) + // chess_prepare_for_pipelining //chess_loop_range(7, ) + // e.g. 28/4 = 7 + // 13 cycles delay for vload. + // 7 gives us 3 cycle inner loop. + // 13 gave 1 cycle inner loop before partial load, not it only gets + // 2 cycles (not sure why?) + { + aie::vector in_a = + aie::load_v<32>(input0 + input_offset1); + input_offset1 += 32; // act oc0..3(ic0..7) + acc_tmp[x8].mac(in_a, in_b); + } + input_offset1 += + (iw * 8) - + 256; // Move to next ic/8 position. 256 = 32 input * 8 ic + } + for (int ic = 0; ic < (input_channels / 16); ic++) { // half ic/8 + // For ic = oc = 8, we can load all the weights in 1x 512b vec reg + // (2x 256b loads) For ic > 8, we would load the next 64 weights + // that are ic8..15(oc0..7) For oc > 8, we would load the next 64 + // weights after all the ic weights {OC}{IC}{IC8}{OC8} + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x8 = 0; x8 < NUM_ACC; x8++) + // chess_prepare_for_pipelining //chess_loop_range(7, ) + // e.g. 28/4 = 7 + // 13 cycles delay for vload. + // 7 gives us 3 cycle inner loop. + // 13 gave 1 cycle inner loop before partial load, not it only gets + // 2 cycles (not sure why?) + { + aie::vector in_a = + aie::load_v<32>(input1 + input_offset2); + input_offset2 += 32; // act oc0..3(ic0..7) + acc_tmp[x8].mac(in_a, in_b); + } + input_offset2 += + (iw * 8) - + 256; // Move to next ic/8 position. 256 = 32 input * 8 ic + } + for (int x8 = 0; x8 < NUM_ACC; x8++) { + vec_conv[x8] = acc_tmp[x8].to_vector(scaleT); + } + } // conv section + + { // skip section + MMULi4x8x8 acci_tmp[NUM_ACC]; + for (int x8 = 0; x8 < NUM_ACC; x8++) { + acci_tmp[x8] = aie::zeros(); + } + + for (int ic = 0; ic < (input_channels_skip / 8); ic++) { + // For oc > 8, we would load the next 64 weights after all the ic + // weights {OC}{IC}{IC8}{OC8} + aie::vector in_b = aie::load_v<64>(kernels_skip); + kernels_skip += 64; // wts ic0..7(oc0..7) + + for (int x8 = 0; x8 < NUM_ACC; x8++) { + aie::vector in_a = + aie::load_v<32>(skip + input_offset3); + input_offset3 += 32; // act oc0..3(ic0..7) + acci_tmp[x8].mac(in_a, in_b); + } + input_offset3 += + (iw * 8) - + 256; // Move to next ic/8 position. 256 = 32 input * 8 ic + } + for (int x8 = 0; x8 < NUM_ACC; x8++) { + vec_skip[x8] = acci_tmp[x8].to_vector(scaleT_skip_conv); + } + } // skip section + + // input ptr just moves to next section + for (int x8 = 0; x8 < NUM_ACC; x8++) { + aie::accum accj; + accj.from_vector(vec_conv[x8], 0); + accj = aie::add(accj, vec_skip[x8]); + aie::vector o1 = accj.to_vector(skip_scaleT); + aie::store_v(out_ptr, o1); + out_ptr += 32; + } + input_offset1 -= + ((input_channels / 16) * iw * 8) - + 256; // reset to next input_width/32 block. 256 = 32 input * 8 ic + input_offset2 -= + ((input_channels / 16) * iw * 8) - + 256; // reset to next input_width/32 block. 256 = 32 input * 8 ic + input_offset3 -= + ((input_channels_skip / 8) * iw * 8) - + 256; // reset to next input_width/32 block. 256 = 32 input * 8 ic + kernels -= + (input_channels / 8) * 64; // reset kernel back to beginning of ic/8 + kernels_skip -= (input_channels_skip / 8) * + 64; // reset kernel back to beginning of ic/8 + } // for(int x=0; x skip1 = aie::load_v<32>(skip_ptr); + // skip_ptr += 32; aie::vector skip1 = + // aie::load_v<32>(skip_ptr); skip_ptr += 32; + // // aie::vector tmp = aie::load_v<32>(out_ptr); + // aie::vector tmp = aie::load_v<32>(i_out_ptr); + // i_out_ptr += 32; aie::accum accj; + // accj.from_vector(skip1,0); + // accj = aie::mac(accj, tmp, (uint8_t)1); + // aie::vector o3 = + // accj.to_vector(skip_scaleT); aie::store_v(out_ptr, + // o3); out_ptr += 32; + // } + // } + // out_ptr += (iw_32_rem*32); + // skip_ptr += (iw_32_rem*32); + // } + + out_ptr -= (output_channels - 1) * iw + (iw_32_rem * 32); + skip_ptr -= (output_channels - 1) * iw + (iw_32_rem * 32); + + } // if(iw_32 > 0) { + + // **TODO** Move out_ptr and skip_ptr back to first oc/8 rem location + + // if(iw_32_rem > 0) { + + // const int ocs = output_channels; + // const int ics = input_channels; + + // input_offset1 = 0; // TODO need to offset this to ic_32_rem position + // input_offset2 = 0; // TODO need to offset this to ic_32_rem position + + // for(int oc=0; oc<(ocs/8); oc++) { + // for(int ic=0; ic<(ics/16); ic++) { + // // For ic = oc = 8, we can load all the weights in 1x 512b vec reg + // (2x 256b loads) + // // For ic > 8, we would load the next 64 weights that are + // ic8..15(oc0..7) + // // For oc > 8, we would load the next 64 weights after all the ic + // weights {OC}{IC}{IC8}{OC8} aie::vector in_b = + // aie::load_v<64>(kernels); kernels+=64; // wts ic0..7(oc0..7) + + // for(int x=0; x in_a = + // aie::load_v<32>(input0+input_offset1); input_offset1 += 32; // + // act oc0..3(ic0..7) acc_tmp[x].mac(in_a, in_b); + // } + // input_offset1 += (iw*8)-(iw_32_rem*32); // Move to next ic/8 + // position, TODO -(iw_32_rem*8)?? + // } + // for(int ic=0; ic<(ics/16); ic++) { + // // For ic = oc = 8, we can load all the weights in 1x 512b vec reg + // (2x 256b loads) + // // For ic > 8, we would load the next 64 weights that are + // ic8..15(oc0..7) + // // For oc > 8, we would load the next 64 weights after all the ic + // weights {OC}{IC}{IC8}{OC8} aie::vector in_b = + // aie::load_v<64>(kernels); kernels+=64; // wts ic0..7(oc0..7) + + // for(int x=0; x in_a = + // aie::load_v<32>(input1+input_offset2); input_offset2 += 32; // + // act oc0..3(ic0..7) acc_tmp[x].mac(in_a, in_b); + // } + // input_offset2 += (iw*8)-(iw_32_rem*32); // Move to next ic/8 + // position + // } + // // input ptr just moves to next section + // for(int xx=0; xx o1 = acc_tmp[xx].to_vector(scaleT); + // aie::vector o1 = acc_tmp[xx].to_vector(scaleT); + // // aie::store_v(out_ptr, o1); out_ptr += 32; + // aie::store_v(i_out_ptr, o1); i_out_ptr += 32; + // acc_tmp[xx] = aie::zeros(); + // } + // // input -= ((ics-1)/8)*(iw*8)+(iw_32_rem*32); // reset to beginning + // of input ptr for remainder input_offset1 -= 448; // reset to + // beginning of input ptr for remainder input_offset2 -= 448; // reset + // to beginning of input ptr for remainder + // // kernel ptr already at next oc/8 + // i_out_ptr += (iw*8)-(iw_32_rem*32); // move to next oc/8 + // (skip remainder section if present) + // } + + // i_out_ptr -= output_channels*iw; + + // for(int oc=0; oc<(output_channels/8); oc++) { + // for(int x8=0; x8 skip1 = aie::load_v<32>(skip_ptr); skip_ptr += + // 32; aie::vector tmp = aie::load_v<32>(i_out_ptr); + // aie::accum accj; + // accj.from_vector(skip1,0); + // accj = aie::mac(accj, tmp, (uint8_t)1); + // aie::vector o3 = accj.to_vector(skip_scaleT); + // aie::store_v(out_ptr, o3); out_ptr += 32; + // } + // out_ptr += (iw*8)-(iw_32_rem*32); + // skip_ptr += (iw*8)-(iw_32_rem*32); + // } + + // } // if(iw_32_rem > 0) + + event1(); +} + +#else // UINT8_ACT + +//***************************************************************************** +// conv2d 1x1 skip init - vector +// act: uint8, wts: int8, skip: uint8, out: uint8 +//***************************************************************************** +void conv2dk1_skip_init_ui8_vector( + uint8_t *input0, uint8_t *input1, int8_t *kernels, uint8_t *output, + uint8_t *skip, const int32_t input_width, const int32_t input_channels, + const int32_t output_channels, const int32_t input_channels_skip, + const int scale, const int skip_scale, const int scale_skip_conv) + +{ + event0(); + + using MMUL4x8x8 = aie::mmul<4, 8, 8, uint8, int8>; + // using MMULi4x8x8 = aie::mmul<4, 8, 8, int8, int8>; + ::aie::set_saturation( + aie::saturation_mode::saturate); // Needed to saturate properly to uint8 + ::aie::set_rounding( + aie::rounding_mode::positive_inf); // Needed to saturate properly to uint8 + + uint8_t * /*restrict*/ out_ptr = output; + int8_t *i_out_ptr = (int8_t *)output; + // uint8_t * restrict skip_ptr = skip; + uint8_t *restrict skip_ptr = skip; + + const int wts_offset = output_channels * input_channels; + int8_t *kernels_skip = kernels + wts_offset; + + const int scaleT = scale; + const int skip_scaleT = skip_scale; + const int scaleT_skip_conv = scale_skip_conv; + + constexpr int NUM_ACC = 8; + + const int iw_32 = (input_width / 4) / 8; + const int iw = input_width; + const int iw_32_rem = (input_width / 4) % 8; + + int input_offset1 = 0; + int input_offset2 = 0; + int input_offset3 = 0; + + // aie::vector vec_tmp[NUM_ACC]; + + if (iw_32 > 0) { + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int x = 0; x < iw_32; x++) { + aie::vector vec_conv[NUM_ACC]; + aie::vector vec_skip[NUM_ACC]; + + MMUL4x8x8 acc_tmp[NUM_ACC]; + { // conv section + // MMUL4x8x8 acc_tmp[NUM_ACC]; + for (int x8 = 0; x8 < NUM_ACC; x8++) { + acc_tmp[x8] = aie::zeros(); + } + + for (int ic = 0; ic < (input_channels / 16); ic++) { // half ic/8 + // For ic = oc = 8, we can load all the weights in 1x 512b vec reg + // (2x 256b loads) For ic > 8, we would load the next 64 weights + // that are ic8..15(oc0..7) For oc > 8, we would load the next 64 + // weights after all the ic weights {OC}{IC}{IC8}{OC8} + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x8 = 0; x8 < NUM_ACC; x8++) + // chess_prepare_for_pipelining //chess_loop_range(7, ) + // e.g. 28/4 = 7 + // 13 cycles delay for vload. + // 7 gives us 3 cycle inner loop. + // 13 gave 1 cycle inner loop before partial load, not it only gets + // 2 cycles (not sure why?) + { + aie::vector in_a = + aie::load_v<32>(input0 + input_offset1); + input_offset1 += 32; // act oc0..3(ic0..7) + acc_tmp[x8].mac(in_a, in_b); + } + input_offset1 += + (iw * 8) - + 256; // Move to next ic/8 position. 256 = 32 input * 8 ic + } + for (int ic = 0; ic < (input_channels / 16); ic++) { // half ic/8 + // For ic = oc = 8, we can load all the weights in 1x 512b vec reg + // (2x 256b loads) For ic > 8, we would load the next 64 weights + // that are ic8..15(oc0..7) For oc > 8, we would load the next 64 + // weights after all the ic weights {OC}{IC}{IC8}{OC8} + aie::vector in_b = aie::load_v<64>(kernels); + kernels += 64; // wts ic0..7(oc0..7) + + for (int x8 = 0; x8 < NUM_ACC; x8++) + // chess_prepare_for_pipelining //chess_loop_range(7, ) + // e.g. 28/4 = 7 + // 13 cycles delay for vload. + // 7 gives us 3 cycle inner loop. + // 13 gave 1 cycle inner loop before partial load, not it only gets + // 2 cycles (not sure why?) + { + aie::vector in_a = + aie::load_v<32>(input1 + input_offset2); + input_offset2 += 32; // act oc0..3(ic0..7) + acc_tmp[x8].mac(in_a, in_b); + } + input_offset2 += + (iw * 8) - + 256; // Move to next ic/8 position. 256 = 32 input * 8 ic + } + for (int x8 = 0; x8 < NUM_ACC; x8++) { + vec_conv[x8] = acc_tmp[x8].to_vector(scaleT); + } + } // conv section + + { // skip section + // MMULi4x8x8 acci_tmp[NUM_ACC]; + for (int x8 = 0; x8 < NUM_ACC; x8++) { + acc_tmp[x8] = aie::zeros(); + } + + for (int ic = 0; ic < (input_channels_skip / 8); ic++) { + // For oc > 8, we would load the next 64 weights after all the ic + // weights {OC}{IC}{IC8}{OC8} + aie::vector in_b = aie::load_v<64>(kernels_skip); + kernels_skip += 64; // wts ic0..7(oc0..7) + + for (int x8 = 0; x8 < NUM_ACC; x8++) { + aie::vector in_a = + aie::load_v<32>(skip + input_offset3); + input_offset3 += 32; // act oc0..3(ic0..7) + acc_tmp[x8].mac(in_a, in_b); + } + input_offset3 += + (iw * 8) - + 256; // Move to next ic/8 position. 256 = 32 input * 8 ic + } + for (int x8 = 0; x8 < NUM_ACC; x8++) { + vec_skip[x8] = acc_tmp[x8].to_vector(scaleT_skip_conv); + } + } // skip section + + // input ptr just moves to next section + for (int x8 = 0; x8 < NUM_ACC; x8++) { + aie::accum accj; + accj.from_vector(vec_conv[x8], 0); + accj = aie::add(accj, vec_skip[x8]); + aie::vector o1 = accj.to_vector(skip_scaleT); + aie::store_v(out_ptr, o1); + out_ptr += 32; + } + input_offset1 -= + ((input_channels / 16) * iw * 8) - + 256; // reset to next input_width/32 block. 256 = 32 input * 8 ic + input_offset2 -= + ((input_channels / 16) * iw * 8) - + 256; // reset to next input_width/32 block. 256 = 32 input * 8 ic + input_offset3 -= + ((input_channels_skip / 8) * iw * 8) - + 256; // reset to next input_width/32 block. 256 = 32 input * 8 ic + kernels -= + (input_channels / 8) * 64; // reset kernel back to beginning of ic/8 + kernels_skip -= (input_channels_skip / 8) * + 64; // reset kernel back to beginning of ic/8 + } // for(int x=0; x skip1 = aie::load_v<32>(skip_ptr); + // skip_ptr += 32; aie::vector skip1 = + // aie::load_v<32>(skip_ptr); skip_ptr += 32; + // // aie::vector tmp = aie::load_v<32>(out_ptr); + // aie::vector tmp = aie::load_v<32>(i_out_ptr); + // i_out_ptr += 32; aie::accum accj; + // accj.from_vector(skip1,0); + // accj = aie::mac(accj, tmp, (uint8_t)1); + // aie::vector o3 = + // accj.to_vector(skip_scaleT); aie::store_v(out_ptr, + // o3); out_ptr += 32; + // } + // } + // out_ptr += (iw_32_rem*32); + // skip_ptr += (iw_32_rem*32); + // } + + out_ptr -= (output_channels - 1) * iw + (iw_32_rem * 32); + skip_ptr -= (output_channels - 1) * iw + (iw_32_rem * 32); + + } // if(iw_32 > 0) { + + // **TODO** Move out_ptr and skip_ptr back to first oc/8 rem location + + // if(iw_32_rem > 0) { + + // const int ocs = output_channels; + // const int ics = input_channels; + + // input_offset1 = 0; // TODO need to offset this to ic_32_rem position + // input_offset2 = 0; // TODO need to offset this to ic_32_rem position + + // for(int oc=0; oc<(ocs/8); oc++) { + // for(int ic=0; ic<(ics/16); ic++) { + // // For ic = oc = 8, we can load all the weights in 1x 512b vec reg + // (2x 256b loads) + // // For ic > 8, we would load the next 64 weights that are + // ic8..15(oc0..7) + // // For oc > 8, we would load the next 64 weights after all the ic + // weights {OC}{IC}{IC8}{OC8} aie::vector in_b = + // aie::load_v<64>(kernels); kernels+=64; // wts ic0..7(oc0..7) + + // for(int x=0; x in_a = + // aie::load_v<32>(input0+input_offset1); input_offset1 += 32; // + // act oc0..3(ic0..7) acc_tmp[x].mac(in_a, in_b); + // } + // input_offset1 += (iw*8)-(iw_32_rem*32); // Move to next ic/8 + // position, TODO -(iw_32_rem*8)?? + // } + // for(int ic=0; ic<(ics/16); ic++) { + // // For ic = oc = 8, we can load all the weights in 1x 512b vec reg + // (2x 256b loads) + // // For ic > 8, we would load the next 64 weights that are + // ic8..15(oc0..7) + // // For oc > 8, we would load the next 64 weights after all the ic + // weights {OC}{IC}{IC8}{OC8} aie::vector in_b = + // aie::load_v<64>(kernels); kernels+=64; // wts ic0..7(oc0..7) + + // for(int x=0; x in_a = + // aie::load_v<32>(input1+input_offset2); input_offset2 += 32; // + // act oc0..3(ic0..7) acc_tmp[x].mac(in_a, in_b); + // } + // input_offset2 += (iw*8)-(iw_32_rem*32); // Move to next ic/8 + // position + // } + // // input ptr just moves to next section + // for(int xx=0; xx o1 = acc_tmp[xx].to_vector(scaleT); + // aie::vector o1 = acc_tmp[xx].to_vector(scaleT); + // // aie::store_v(out_ptr, o1); out_ptr += 32; + // aie::store_v(i_out_ptr, o1); i_out_ptr += 32; + // acc_tmp[xx] = aie::zeros(); + // } + // // input -= ((ics-1)/8)*(iw*8)+(iw_32_rem*32); // reset to beginning + // of input ptr for remainder input_offset1 -= 448; // reset to + // beginning of input ptr for remainder input_offset2 -= 448; // reset + // to beginning of input ptr for remainder + // // kernel ptr already at next oc/8 + // i_out_ptr += (iw*8)-(iw_32_rem*32); // move to next oc/8 + // (skip remainder section if present) + // } + + // i_out_ptr -= output_channels*iw; + + // for(int oc=0; oc<(output_channels/8); oc++) { + // for(int x8=0; x8 skip1 = aie::load_v<32>(skip_ptr); skip_ptr += + // 32; aie::vector tmp = aie::load_v<32>(i_out_ptr); + // aie::accum accj; + // accj.from_vector(skip1,0); + // accj = aie::mac(accj, tmp, (uint8_t)1); + // aie::vector o3 = accj.to_vector(skip_scaleT); + // aie::store_v(out_ptr, o3); out_ptr += 32; + // } + // out_ptr += (iw*8)-(iw_32_rem*32); + // skip_ptr += (iw*8)-(iw_32_rem*32); + // } + + // } // if(iw_32_rem > 0) + + event1(); +} + +#endif // UINT8_ACT + +#endif // Vector + +//***************************************************************************** +// conv2d 1x1 skip init wrappers +//***************************************************************************** +extern "C" { + +#ifdef SCALAR + +#ifdef INT8_ACT + +void conv2dk1_skip_init_i8(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, int8_t *skip, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, + const int32_t input_channels_skip, const int scale, + const int skip_scale, const int scale_skip_conv) { + conv2dk1_skip_init_i8_scalar( + input0, input1, kernels, output, skip, input_width, input_channels, + output_channels, input_channels_skip, scale, skip_scale, scale_skip_conv); +} + +#else // UINT8_ACT + +void conv2dk1_skip_init_ui8(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, uint8_t *skip, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, + const int32_t input_channels_skip, const int scale, + const int skip_scale, const int scale_skip_conv) { + // conv2dk1_skip_init_ui8_scalar(input0, input1, kernels, output, skip, + // input_width, input_channels, output_channels, input_channels_skip, scale, + // skip_scale, scale_skip_conv); +} + +#endif // UINT8_ACT + +#else // Vector + +#ifdef INT8_ACT + +void conv2dk1_skip_init_i8(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, int8_t *skip, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, + const int32_t input_channels_skip, const int scale, + const int skip_scale, const int scale_skip_conv) { + conv2dk1_skip_init_i8_vector( + input0, input1, kernels, output, skip, input_width, input_channels, + output_channels, input_channels_skip, scale, skip_scale, scale_skip_conv); +} + +#else // UINT8_ACT + +void conv2dk1_skip_init_ui8(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, uint8_t *skip, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, + const int32_t input_channels_skip, const int scale, + const int skip_scale, const int scale_skip_conv) { + // conv2dk1_skip_init_ui8_vector(input0, input1, kernels, output, skip, + // input_width, input_channels, output_channels, input_channels_skip, scale, + // skip_scale, scale_skip_conv); +} + +#endif // UINT8_ACT + +#endif // Vector + +} // extern "C" \ No newline at end of file diff --git a/aie_kernels/aie2/conv2dk1_skip_init.h b/aie_kernels/aie2/conv2dk1_skip_init.h new file mode 100755 index 0000000000..cfb4b8b467 --- /dev/null +++ b/aie_kernels/aie2/conv2dk1_skip_init.h @@ -0,0 +1,33 @@ +//===- conv2dk1_skip_init.h -------------------------------------------------*- +// C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +#ifndef _CONV2DK1_SKIP_INIT_H +#define _CONV2DK1_SKIP_INIT_H + +extern "C" { + +void conv2dk1_skip_init_i8(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, int8_t *skip, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, const int scale, + const int skip_scale); + +void conv2dk1_skip_init_ui8(uint8_t *input0, uint8_t *input1, int8_t *kernels, + uint8_t *output, uint8_t *skip, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, const int scale, + const int skip_scale); + +} // extern "C" + +#endif diff --git a/aie_kernels/aie2/conv2dk3.cc b/aie_kernels/aie2/conv2dk3.cc new file mode 100755 index 0000000000..e0f3d9e1b5 --- /dev/null +++ b/aie_kernels/aie2/conv2dk3.cc @@ -0,0 +1,1434 @@ +//===- conv2dk3.cc -------------------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// #define __AIENGINE__ 1 +#define __AIENGINE__ 2 +#define NOCPP +#define __AIEARCH__ 20 + +#include +#include +#include + +#include + +#define REL_WRITE 0 +#define REL_READ 1 + +enum region { top, middle, bottom }; + +#ifdef SCALAR + +const int32_t MAX = 255; + +#ifdef INT8_ACT + +//***************************************************************************** +// conv2d 3x3 - scalar +// act: int8, wts: int8, out: uint8 +//***************************************************************************** +void conv2dk3_i8_scalar(int8_t *line0, int8_t *line1, int8_t *line2, + int8_t *wts, uint8_t *output, const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, + const int32_t kernel_width, const int32_t kernel_height, + const int32_t check, const int scale, + const int channel_offset) { + event0(); + + int x, ki, ic, oc, ic8, oc8; + int32_t sum; + int sum_srs; + int wts_indx_0 = 0, wts_indx_1 = 0, wts_indx_2 = 0; + int in_indx_0 = 0; + // for (oc = (0+channel_offset)/8; oc < (output_channels+channel_offset)/8; + // oc++) { + for (oc = 0; oc < output_channels / 8; oc++) { + int oc_ofst = oc + (channel_offset / 8); + for (oc8 = 0; oc8 < 8; oc8++) { + + // left border + sum = 0; + sum_srs = 0; + for (ic = 0; ic < input_channels / 8; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + for (ki = 1; ki < kernel_width; ki++) { + + // replicate 1 border pixel on the left + // wts_indx_0=0*3 + ki + 3*kernel_width*ic + + // 3*kernel_width*input_channels*oc; wts_indx_1=1*3 + ki + + // 3*kernel_width*ic + 3*kernel_width*input_channels*oc; + // wts_indx_2=2*3 + ki + 3*kernel_width*ic + + // 3*kernel_width*input_channels*oc; + int wts_indx_0 = + (0 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + oc8; + int wts_indx_1 = + (1 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + oc8; + int wts_indx_2 = + (2 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + oc8; + + if (ki == 0) { + // in_indx_0=0+ki+input_width*ic; + in_indx_0 = (0 + ki) * 8 + ((ic * input_width * 8) + ic8); + } else { + // in_indx_0=0+ki-1+input_width*ic; + in_indx_0 = (0 + ki - 1) * 8 + ((ic * input_width * 8) + ic8); + } + + if (check != top) + sum += line0[in_indx_0] * wts[wts_indx_0]; + sum += line1[in_indx_0] * wts[wts_indx_1]; + if (check != bottom) + sum += line2[in_indx_0] * wts[wts_indx_2]; + } + } + } + // output[oc * (input_width) + 0] = sum; + sum_srs = (sum + (1 << (scale - 1))) >> scale; + sum_srs = (sum_srs > MAX) ? MAX : (sum_srs < 0) ? 0 : sum_srs; + // output[oc * input_width + 0] = sum_srs; + output[(oc * input_width * 8) + oc8] = sum_srs; + + // right border + sum = 0; + sum_srs = 0; + for (ic = 0; ic < input_channels / 8; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + for (ki = 0; ki < kernel_width - 1; ki++) { + // wts_indx_0=0*3 + ki + 3*kernel_width*ic + + // 3*kernel_width*input_channels*oc; wts_indx_1=1*3 + ki + + // 3*kernel_width*ic + 3*kernel_width*input_channels*oc; + // wts_indx_2=2*3 + ki + 3*kernel_width*ic + + // 3*kernel_width*input_channels*oc; + int wts_indx_0 = + (0 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + oc8; + int wts_indx_1 = + (1 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + oc8; + int wts_indx_2 = + (2 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + oc8; + + if (ki != 2) { + // in_indx_0=input_width-2+ki+input_width*ic; + in_indx_0 = + (input_width - 2 + ki) * 8 + ((ic * input_width * 8) + ic8); + } else { // replicate 1 border pixel on the right + // in_indx_0=input_width-2+ki-1+input_width*ic; + in_indx_0 = (input_width - 2 + ki - 1) * 8 + + ((ic * input_width * 8) + ic8); + } + if (check != top) + sum += line0[in_indx_0] * wts[wts_indx_0]; + sum += line1[in_indx_0] * wts[wts_indx_1]; + if (check != bottom) + sum += line2[in_indx_0] * wts[wts_indx_2]; + } + } + } + sum_srs = (sum + (1 << (scale - 1))) >> scale; + sum_srs = (sum_srs > MAX) ? MAX : (sum_srs < 0) ? 0 : sum_srs; + // output[oc * input_width + input_width-1] = sum_srs; + output[(oc * input_width * 8) + (input_width - 1) * 8 + oc8] = sum_srs; + // output[oc * (input_width) + input_width-1] = sum; + + for (x = 1; x < input_width - 1; x++) { // col of output image + sum = 0; + sum_srs = 0; + for (ic = 0; ic < input_channels / 8; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + for (ki = 0; ki < kernel_width; ki++) { + // wts format - orig is oc,ic,ky,kx, reformat is + // oc,ic,k0..k8,ic8,oc8 + + // int wts_indx_0=0*3 + ki + 3*kernel_width*ic + + // 3*kernel_width*input_channels*oc; int wts_indx_1=1*3 + ki + + // 3*kernel_width*ic + 3*kernel_width*input_channels*oc; int + // wts_indx_2=2*3 + ki + 3*kernel_width*ic + + // 3*kernel_width*input_channels*oc; + int wts_indx_0 = + (0 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + + oc8; + int wts_indx_1 = + (1 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + + oc8; + int wts_indx_2 = + (2 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + + oc8; + + // int in_indx_0=x-1+ki+input_width*ic; + int in_indx_0 = (x - 1 + ki) * 8 + ((ic * input_width * 8) + ic8); + + if (check != top) + sum += line0[in_indx_0] * wts[wts_indx_0]; + sum += line1[in_indx_0] * wts[wts_indx_1]; + if (check != bottom) + sum += line2[in_indx_0] * wts[wts_indx_2]; + } + } + } + sum_srs = (sum + (1 << (scale - 1))) >> scale; + sum_srs = (sum_srs > MAX) ? MAX : (sum_srs < 0) ? 0 : sum_srs; + output[(oc * input_width * 8) + x * 8 + oc8] = sum_srs; + // output[oc * (input_width) + x] = sum; + } + } + } + + event1(); +} + +#else // UINT8_ACT + +//***************************************************************************** +// conv2d 3x3 - scalar +// act: uint8, wts: int8, out: uint8 +//***************************************************************************** +void conv2dk3_ui8_scalar(uint8_t *line0, uint8_t *line1, uint8_t *line2, + int8_t *wts, uint8_t *output, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, + const int32_t kernel_width, + const int32_t kernel_height, const int32_t check, + const int scale, const int channel_offset) { + event0(); + + int x, ki, ic, oc, ic8, oc8; + int32_t sum; + int sum_srs; + int wts_indx_0 = 0, wts_indx_1 = 0, wts_indx_2 = 0; + int in_indx_0 = 0; + // for (oc = (0+channel_offset)/8; oc < (output_channels+channel_offset)/8; + // oc++) { + for (oc = 0; oc < output_channels / 8; oc++) { + int oc_ofst = oc + (channel_offset / 8); + for (oc8 = 0; oc8 < 8; oc8++) { + + // left border + sum = 0; + sum_srs = 0; + for (ic = 0; ic < input_channels / 8; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + for (ki = 1; ki < kernel_width; ki++) { + + // replicate 1 border pixel on the left + // wts_indx_0=0*3 + ki + 3*kernel_width*ic + + // 3*kernel_width*input_channels*oc; wts_indx_1=1*3 + ki + + // 3*kernel_width*ic + 3*kernel_width*input_channels*oc; + // wts_indx_2=2*3 + ki + 3*kernel_width*ic + + // 3*kernel_width*input_channels*oc; + int wts_indx_0 = + (0 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + oc8; + int wts_indx_1 = + (1 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + oc8; + int wts_indx_2 = + (2 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + oc8; + + if (ki == 0) { + // in_indx_0=0+ki+input_width*ic; + in_indx_0 = (0 + ki) * 8 + ((ic * input_width * 8) + ic8); + } else { + // in_indx_0=0+ki-1+input_width*ic; + in_indx_0 = (0 + ki - 1) * 8 + ((ic * input_width * 8) + ic8); + } + + if (check != top) + sum += line0[in_indx_0] * wts[wts_indx_0]; + sum += line1[in_indx_0] * wts[wts_indx_1]; + if (check != bottom) + sum += line2[in_indx_0] * wts[wts_indx_2]; + } + } + } + // output[oc * (input_width) + 0] = sum; + sum_srs = (sum + (1 << (scale - 1))) >> scale; + sum_srs = (sum_srs > MAX) ? MAX : (sum_srs < 0) ? 0 : sum_srs; + // output[oc * input_width + 0] = sum_srs; + output[(oc * input_width * 8) + oc8] = sum_srs; + + // right border + sum = 0; + sum_srs = 0; + for (ic = 0; ic < input_channels / 8; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + for (ki = 0; ki < kernel_width - 1; ki++) { + // wts_indx_0=0*3 + ki + 3*kernel_width*ic + + // 3*kernel_width*input_channels*oc; wts_indx_1=1*3 + ki + + // 3*kernel_width*ic + 3*kernel_width*input_channels*oc; + // wts_indx_2=2*3 + ki + 3*kernel_width*ic + + // 3*kernel_width*input_channels*oc; + int wts_indx_0 = + (0 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + oc8; + int wts_indx_1 = + (1 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + oc8; + int wts_indx_2 = + (2 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + oc8; + + if (ki != 2) { + // in_indx_0=input_width-2+ki+input_width*ic; + in_indx_0 = + (input_width - 2 + ki) * 8 + ((ic * input_width * 8) + ic8); + } else { // replicate 1 border pixel on the right + // in_indx_0=input_width-2+ki-1+input_width*ic; + in_indx_0 = (input_width - 2 + ki - 1) * 8 + + ((ic * input_width * 8) + ic8); + } + if (check != top) + sum += line0[in_indx_0] * wts[wts_indx_0]; + sum += line1[in_indx_0] * wts[wts_indx_1]; + if (check != bottom) + sum += line2[in_indx_0] * wts[wts_indx_2]; + } + } + } + sum_srs = (sum + (1 << (scale - 1))) >> scale; + sum_srs = (sum_srs > MAX) ? MAX : (sum_srs < 0) ? 0 : sum_srs; + // output[oc * input_width + input_width-1] = sum_srs; + output[(oc * input_width * 8) + (input_width - 1) * 8 + oc8] = sum_srs; + // output[oc * (input_width) + input_width-1] = sum; + + for (x = 1; x < input_width - 1; x++) { // col of output image + sum = 0; + sum_srs = 0; + for (ic = 0; ic < input_channels / 8; ic++) { + for (ic8 = 0; ic8 < 8; ic8++) { + for (ki = 0; ki < kernel_width; ki++) { + // wts format - orig is oc,ic,ky,kx, reformat is + // oc,ic,k0..k8,ic8,oc8 + + // int wts_indx_0=0*3 + ki + 3*kernel_width*ic + + // 3*kernel_width*input_channels*oc; int wts_indx_1=1*3 + ki + + // 3*kernel_width*ic + 3*kernel_width*input_channels*oc; int + // wts_indx_2=2*3 + ki + 3*kernel_width*ic + + // 3*kernel_width*input_channels*oc; + int wts_indx_0 = + (0 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + + oc8; + int wts_indx_1 = + (1 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + + oc8; + int wts_indx_2 = + (2 * 3 * 64) + (ki * 64) + (ic * 3 * kernel_width * 64) + + (ic8 * 8) + + (oc_ofst * (input_channels / 8) * 3 * kernel_width * 64) + + oc8; + + // int in_indx_0=x-1+ki+input_width*ic; + int in_indx_0 = (x - 1 + ki) * 8 + ((ic * input_width * 8) + ic8); + + if (check != top) + sum += line0[in_indx_0] * wts[wts_indx_0]; + sum += line1[in_indx_0] * wts[wts_indx_1]; + if (check != bottom) + sum += line2[in_indx_0] * wts[wts_indx_2]; + } + } + } + sum_srs = (sum + (1 << (scale - 1))) >> scale; + sum_srs = (sum_srs > MAX) ? MAX : (sum_srs < 0) ? 0 : sum_srs; + output[(oc * input_width * 8) + x * 8 + oc8] = sum_srs; + // output[oc * (input_width) + x] = sum; + } + } + } + + event1(); +} + +#endif // UINT8_ACT + +#else // Vector + +#ifdef INT8_ACT + +//***************************************************************************** +// conv2d 3x3 - vector +// act: int8, wts: int8, out: uint8 +//***************************************************************************** +void conv2dk3_i8_vector(int8_t *line0, int8_t *line1, int8_t *line2, + int8_t *wts, uint8_t *output, const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, + const int32_t kernel_width, const int32_t kernel_height, + const int32_t check, const int scale, + const int channel_offset) { + event0(); + + // Compute + using MMUL4x8x8 = aie::mmul<4, 8, 8, int8, int8>; + ::aie::set_saturation( + aie::saturation_mode::saturate); // Needed to saturate properly to uint8 + ::aie::set_rounding( + aie::rounding_mode::positive_inf); // Needed to saturate properly to uint8 + + constexpr unsigned VecFactor = 16; + + // const int scale = 11; + + // basic MMUL intrinisic needed is k x ic x oc + // k is number of inputs processed at a time + // So if ic=8, oc=4, then k=8 and we use 8x8x4 + const unsigned k = + 256 / (input_channels * output_channels); // 8 inputs per vector output + + aie::vector zero32 = aie::zeros(); + + // aie::vector prev_a[3], + // aie::vector in_a; + // aie::vector in_b; + // aie::vector tmp_a; + // aie::vector tmp_a1, tmp_a2; + + // int8_t * restrict line[3]; + int8_t *line[3]; + line[0] = line0; + line[1] = line1; + line[2] = line2; + + // int8_t * restrict wtsLine[3]; + int8_t *wtsLine[3]; + // oc,ic,ky,kx,ic8,oc8 + wtsLine[0] = wts + (channel_offset / 8) * (input_channels / 8) * + kernel_height * kernel_width * 64; + wtsLine[1] = wts + + (channel_offset / 8) * (input_channels / 8) * kernel_height * + kernel_width * 64 + + kernel_width * 64; // next kernel line is always 8*8 away + wtsLine[2] = wts + + (channel_offset / 8) * (input_channels / 8) * kernel_height * + kernel_width * 64 + + 2 * kernel_width * 64; // next kernel line is always 8*8 away + + MMUL4x8x8 acc_tmp[8]; + + // Zero accumulators used for storing partial results + // for(int x=0; x(); + } + + // TODO temporary workaround. When assigned to input_width, the results are + // wrong. ??? + const int iw = 32; + // const int32_t iw = input_width; + + // const int iw_32 = ((input_width/4)-2)/8; + // const int iw_32 = ((iw/4)-2)/8; + // const int iw_32 = ((32/4)-2)/8; + const int iw_32 = 0; + + // const int iw_32_rem = ((input_width/4)-2) % 8; + // const int iw_32_rem = ((iw/4)-2) % 8; + // const int iw_32_rem = ((32/4)-2) % 8; + const int iw_32_rem = 6; + + // output += (channel_offset*iw); // channel_offset/8*iw*8 + + int kernel_height_start; + int kernel_height_end; + + // int kernel_height_start, kernel_height_end; +#ifdef BORDER_REPLICATE + kernel_height_start = 0; + kernel_height_end = kernel_height; + // constexpr int kernel_height_start = 0; + // constexpr int kernel_height_end = kernel_height; +#else // Zero border for 3x3 + // constexpr int kernel_height_start = 0; + // constexpr int kernel_height_end = kernel_height-1; + + // if(check == top) + // idx_adj = 1; + + // We skip top or bottom row for zero border + switch (check) { + case top: + kernel_height_start = 1; + kernel_height_end = kernel_height; + break; + case middle: + kernel_height_start = 0; + kernel_height_end = kernel_height; + break; + case bottom: + kernel_height_start = 0; + kernel_height_end = kernel_height - 1; + break; + } +#endif + + // -------------------------------------------------------------------- + // Leftmost pattern + // -------------------------------------------------------------------- + // Computes leftmost 4 inputs for all input/output channels. + // This shifts the leftmost input data by 1 (x8 channels) for 3x3 to + // account for border. Border replicate copies the leftmost input while + // 0 border shifts in 0's. If we need to support larger than 3x3, the + // replicate logic would need to be changed. + // -------------------------------------------------------------------- + { + // in_b = aie::load_v<64>(wtsLine[kernel_height_start]); + // wtsLine[kernel_height_start] +=64; // wts ic0..7(oc0..7) + + MMUL4x8x8 acc1 = aie::zeros(); + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int ic = 0; ic < (input_channels / 8); ic++) { + for (int i = kernel_height_start; i < kernel_height_end; i++) + chess_prepare_for_pipelining chess_loop_range(2, ) + // chess_unroll_loop() + { + // aie::vector tmp_a1, tmp_a2; + // Load input data [a0 a1 a2 a3 a4 a5 a6 a7] where each position has + // data for 8 channels + auto tmp_a1 = aie::load_v<32>(line[i]); + line[i] += 32; // act 0..3 (ic0..7 for each) + auto tmp_a2 = + aie::load_v<32>(line[i]); // act 4..7 (ic0..7 for each) + auto in_a = aie::concat(tmp_a1, tmp_a2); + +#ifdef BORDER_REPLICATE + tmp_a1 = aie::shuffle_up(tmp_a1, 24); + tmp_a.insert<32>(1, tmp_a1); +#else + tmp_a = aie::zeros(); +#endif + // Shift right 1 input (8 channels) [- a0 a1 a2 a3 a4 a5 a6] where - + // is either a0 or 0's + in_a = aie::shuffle_up_fill(in_a, tmp_a, 8); + + // Previous buffer stores shifted data, [- - - - a0 a1 a2 a3] + // where - is + // prev_a[i] = aie::shuffle_up(in_a, 24); // Shift right (4-1)*8 + + // prev_a[i] = in_a; + // prev_a[i] = aie::shuffle_up(prev_a[i], 24); // Shift right + // (4-1)*8 + + // For kernel width, we load 64 weights (8 ics x 8 ocs) and multiply + // it with the act buffer. acc[32] += in_a[32] * wts[64] We then + // shift the buffer left by 1 data position (8 channels). + for (int j = 0; j < kernel_width; j++) + // chess_unroll_loop() + { + auto in_b = aie::load_v<64>(wtsLine[i]); + wtsLine[i] += 64; // wts ic0..7(oc0..7) + acc1.mac(in_a.extract<32>(0), in_b); + // Shift input A by 1 row (1x8) which is by 1 (the 8 is the ic=8) + in_a = aie::shuffle_down(in_a, 8); + } + wtsLine[i] -= + (kernel_width * 64); // Reset weight pointer for this line + // wtsLine[i] += ((kernel_height-1)*kernel_width*64); // Move to + // next ic/8 position No need to load next set of weights because + // next row of weights immediately follows line[i] += (iw*4)*8; // + // Increment to next ic/8 position (reset at end of outermost loop) + } // for(int i=kernel_height_start; i o1 = acc1.to_vector(scale); + aie::store_v(output, o1); + output += iw * 8; // Shift to next oc/8 offset for left side + + acc1 = aie::zeros(); + + // Shift back to beginning of input + for (int i = kernel_height_start; i < kernel_height_end; i++) { + line[i] -= (input_channels / 8) * (iw * 8); + } + + } // for(int oc=0; oc<(output_channels/8); oc++) { + + // Reset output to beginning, then add 4*8 + // Reset wts to beginning of wts + // Reset line to beginning of input, then add 4*8 + output -= (output_channels / 8) * (iw * 8) - 32; + for (int i = kernel_height_start; i < kernel_height_end; i++) { + wtsLine[i] -= (output_channels / 8) * (input_channels / 8) * + kernel_width * kernel_height * + 64; // kernel_width*kernel_height*8*8 + // line[i] -= (output_channels/8)*(input_channels/8)*(iw*8)-32; // + line[i] += 32; + } + } + + // -------------------------------------------------------------------- + // Middle pattern + // -------------------------------------------------------------------- + // The middle seciton algorithm is different because we want to minimize + // the reloading of weights and activations. So instead, we use up to 8 + // accumulators to store partial products with activations being shifted. + // Then for the next kernel position, we reload weights. + // + // H,W,C8 + // -------------------------------------------------------------------- + + // Main loop for when input_width/4-2 > 8 + if (iw_32 > 0) { + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int iw_32c = 0; iw_32c < iw_32; iw_32c++) { + for (int ic = 0; ic < (input_channels / 8); ic++) { + for (int i = kernel_height_start; i < kernel_height_end; + i++) { // 1 to 3 + + for (int j = 0; j < kernel_width; j++) { + aie::vector wtsVec = aie::load_v<64>(wtsLine[i]); + wtsLine[i] += 64; + + // auto prev = prev_a[i].extract<32>(1); // prev + // = x0..x3(ci0..ci7) + auto prev = aie::load_v<32>((line[i] - 32)); + auto curr = aie::load_v<32>((line[i])); + line[i] += 32; + auto next = aie::load_v<32>((line[i])); + line[i] += 32; + + for (int x = 0; x < 8; x++) + // chess_unroll_loop() + { + auto tmp1 = aie::concat(curr, next); + auto tprev = aie::concat(zero32, prev); + auto tmp2 = aie::shuffle_up_fill( + tmp1, tprev, 8); // curr = x3..x6(ci0..ci7) + auto tmp3 = aie::shuffle_down( + tmp2, j * 8); // curr = x4..x7(ci0..ci7) to + // x5..x8(ci0..ci7)ss + + prev = curr; + curr = next; + next = aie::load_v<32>(line[i]); + line[i] += 32; // next_prev = x4..x7(ci0..ci7) + + acc_tmp[x].mac(tmp3.extract<32>(0), wtsVec); + } // for(int x=0; x<8; x++) + line[i] -= 320; // (8+2)*32, Reset line buffer ptr to beginning of + // line (after first 4) + } // for(int j=0; j o1 = acc_tmp[x].to_vector(scale); + aie::store_v(output, o1); + output += 32; + acc_tmp[x] = aie::zeros(); + } + // For next 8 activations, reset line buffer and weights + for (int i = kernel_height_start; i < kernel_height_end; i++) { + line[i] -= + (input_channels / 8) * (iw * 8); // length of act to shift back + } + } // for(int iw_32c=0; iw_32c 0) + + // Secondary loop for input_width remainder (iw_32_rem < 8) + if (iw_32_rem > 0) { + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int ic = 0; ic < (input_channels / 8); ic++) { + for (int i = kernel_height_start; i < kernel_height_end; + i++) { // 1 to 3 + for (int j = 0; j < kernel_width; j++) { + // New weight every kernel_width + aie::vector wtsVec = aie::load_v<64>(wtsLine[i]); + wtsLine[i] += 64; + // auto prev = prev_a[i].extract<32>(1); // prev = + // x0..x3(ci0..ci7) + auto prev = aie::load_v<32>((line[i] - 32)); + auto curr = aie::load_v<32>((line[i])); + line[i] += 32; + auto next = aie::load_v<32>((line[i])); + line[i] += 32; + + for (int x = 0; x < iw_32_rem; x++) // remainder input width < 8 + // chess_unroll_loop() + { + auto tmp1 = aie::concat(curr, next); + auto tprev = aie::concat(zero32, prev); + auto tmp2 = aie::shuffle_up_fill( + tmp1, tprev, 8); // curr = x3..x6(ci0..ci7) + auto tmp3 = aie::shuffle_down( + tmp2, + j * 8); // curr = x3..x6(ci0..ci7) to x5..x8(ci0..ci7)ss + + prev = curr; + curr = next; + next = aie::load_v<32>(line[i]); + line[i] += 32; // next_prev = x4..x7(ci0..ci7) + + acc_tmp[x].mac(tmp3.extract<32>(0), wtsVec); + } + line[i] -= + (iw_32_rem + 2) * 32; // Reset line buffer ptr to beginning of + // line (after first 4) + } // for(int j=0; j o1 = acc_tmp[x].to_vector(scale); + aie::store_v(output, o1); + output += 32; + acc_tmp[x] = aie::zeros(); // Reset accumulators + } + // Reset line ptr to beginning of input + for (int i = kernel_height_start; i < kernel_height_end; i++) { + line[i] -= (input_channels / 8) * (iw * 8); + } + // Output ptr should be in the right place (next oc/8) + output += (iw * 8) - (iw_32_rem * 32); // 32 = 4*8, shift to next oc/8 + } // for(int oc=0; oc<(output_channels/8); oc++) + // Reset weights and line buffers for right side + for (int i = kernel_height_start; i < kernel_height_end; i++) { + wtsLine[i] -= (output_channels / 8) * (input_channels / 8) * + kernel_width * kernel_height * + 64; // kernel_width*kernel_height*8*8 + line[i] += + iw_32_rem * 32; // shift to beginnign of right data, iw_32_rem*4*8 + } + // shift back so we're aligned with beginning of first oc/8 (rightmost 4 + // data) + output -= (output_channels / 8) * (iw * 8) - (iw_32_rem * 32); + + } // if (iw_32_rem > 0) { + + // -------------------------------------------------------------------- + // Right patterns + // -------------------------------------------------------------------- + // + // -------------------------------------------------------------------- + { + MMUL4x8x8 acc1 = aie::zeros(); + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int ic = 0; ic < (input_channels / 8); ic++) { + for (int i = kernel_height_start; i < kernel_height_end; i++) + chess_prepare_for_pipelining chess_loop_range(2, ) + // chess_unroll_loop() + { + // Load next set of data for input A (matrix row), need stride info + // or line1/2/3 pointer + // TODO, did not store previous so need to load it again + // in_a = aie::load_v<64>(line[i]-32); + auto tmp_a1 = + aie::load_v<32>(line[i] - 32); // act 24..27 (ic0..7 for each) + auto tmp_a2 = + aie::load_v<32>(line[i]); // act 28..31 (ic0..7 for each) + auto in_a = aie::concat(tmp_a1, tmp_a2); +#ifdef BORDER_REPLICATE + tmp_a2 = aie::shuffle_down(tmp_a2, 24); + tmp_a.insert<32>(0, tmp_a2); +#else + auto tmp_a = aie::zeros(); +#endif + // shift by 32-8 (fill 32 then shift up by 8) + in_a = aie::shuffle_down_fill(in_a, tmp_a, 24); // act 27..31 - - - + + for (int j = 0; j < kernel_width; j++) + // chess_unroll_loop() + { + auto in_b = aie::load_v<64>(wtsLine[i]); + wtsLine[i] += 64; // wts ic0..7(oc0..7) + acc1.mac(in_a.extract<32>(0), in_b); + // Shift input A by 1 row (1x8) which is by 1 (the 8 is the ic=8) + in_a = aie::shuffle_down(in_a, 8); + } + wtsLine[i] += ((kernel_height - 1) * kernel_width * + 64); // Move to next ic/8 position + // No need to load next set of weights because next row of weights + // immediately follows + line[i] += (iw * 8); // Increment to next ic/8 position (reset at + // end of outermost loop) + } // for(int i=kernel_height_start; i o1 = acc1.to_vector(scale); + aie::store_v(output, o1); + output += iw * 8; // Shift to next oc/8 + + acc1 = aie::zeros(); + + for (int i = kernel_height_start; i < kernel_height_end; i++) { + line[i] -= (input_channels / 8) * + (iw * 8); // shift back to beginning of this section + } + } // for(int oc=0; oc<(output_channels/8); oc++) { + } + event1(); +} + +#else // UINT8_ACT + +//***************************************************************************** +// conv2d 3x3 - vector +// act: uint8, wts: int8, out: uint8 +//***************************************************************************** +// Takes 3 input lines and computes 1 output line +void conv2dk3_ui8_vector(uint8_t *line0, uint8_t *line1, uint8_t *line2, + int8_t *wts, uint8_t *output, + const int32_t input_width, + const int32_t input_channels, + const int32_t output_channels, + const int32_t kernel_width, + const int32_t kernel_height, const int32_t check, + const int scale, const int channel_offset) { + event0(); + + // Compute + using MMUL4x8x8 = aie::mmul<4, 8, 8, uint8, int8>; + ::aie::set_saturation( + aie::saturation_mode::saturate); // Needed to saturate properly to uint8 + ::aie::set_rounding( + aie::rounding_mode::positive_inf); // Needed to saturate properly to uint8 + + constexpr unsigned VecFactor = 16; + + // const int scale = 11; + + // basic MMUL intrinisic needed is k x ic x oc + // k is number of inputs processed at a time + // So if ic=8, oc=4, then k=8 and we use 8x8x4 + const unsigned k = + 256 / (input_channels * output_channels); // 8 inputs per vector output + + aie::vector zero32 = aie::zeros(); + + // aie::vector prev_a[3], + // aie::vector in_a; + // aie::vector tmp_a; + // aie::vector tmp_a1, tmp_a2; + // aie::vector in_b; + + uint8_t *restrict line[3]; + // uint8_t *line[3]; + line[0] = line0; + line[1] = line1; + line[2] = line2; + + int8_t *restrict wtsLine[3]; + // int8_t *wtsLine[3]; + // oc,ic,ky,kx,ic8,oc8 + wtsLine[0] = wts + (channel_offset / 8) * (input_channels / 8) * + kernel_height * kernel_width * 64; + wtsLine[1] = wts + + (channel_offset / 8) * (input_channels / 8) * kernel_height * + kernel_width * 64 + + kernel_width * 64; // next kernel line is always 8*8 away + wtsLine[2] = wts + + (channel_offset / 8) * (input_channels / 8) * kernel_height * + kernel_width * 64 + + 2 * kernel_width * 64; // next kernel line is always 8*8 away + + MMUL4x8x8 acc_tmp[8]; + + // Zero accumulators used for storing partial results + // for(int x=0; x(); + } + + // TODO temporary workaround. When assigned to input_width, the results are + // wrong. ??? + const int iw = 32; + // const int32_t iw = input_width; + + // const int iw_32 = ((input_width/4)-2)/8; + // const int iw_32 = ((iw/4)-2)/8; + // const int iw_32 = ((32/4)-2)/8; + const int iw_32 = 0; + + // const int iw_32_rem = ((input_width/4)-2) % 8; + // const int iw_32_rem = ((iw/4)-2) % 8; + // const int iw_32_rem = ((32/4)-2) % 8; + const int iw_32_rem = 6; + + // output += (channel_offset*iw); // channel_offset/8*iw*8 + + int kernel_height_start; + int kernel_height_end; + + // int kernel_height_start, kernel_height_end; +#ifdef BORDER_REPLICATE + kernel_height_start = 0; + kernel_height_end = kernel_height; + // constexpr int kernel_height_start = 0; + // constexpr int kernel_height_end = kernel_height; +#else // Zero border for 3x3 + // constexpr int kernel_height_start = 0; + // constexpr int kernel_height_end = kernel_height-1; + + // if(check == top) + // idx_adj = 1; + + // We skip top or bottom row for zero border + switch (check) { + case top: + kernel_height_start = 1; + kernel_height_end = kernel_height; + break; + case middle: + kernel_height_start = 0; + kernel_height_end = kernel_height; + break; + case bottom: + kernel_height_start = 0; + kernel_height_end = kernel_height - 1; + break; + } +#endif + + // -------------------------------------------------------------------- + // Leftmost pattern + // -------------------------------------------------------------------- + // Computes leftmost 4 inputs for all input/output channels. + // This shifts the leftmost input data by 1 (x8 channels) for 3x3 to + // account for border. Border replicate copies the leftmost input while + // 0 border shifts in 0's. If we need to support larger than 3x3, the + // replicate logic would need to be changed. + // -------------------------------------------------------------------- + { + // in_b = aie::load_v<64>(wtsLine[kernel_height_start]); + // wtsLine[kernel_height_start] +=64; // wts ic0..7(oc0..7) + + MMUL4x8x8 acc1 = aie::zeros(); + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int ic = 0; ic < (input_channels / 8); ic++) + chess_loop_range(2, ) { + for (int i = kernel_height_start; i < kernel_height_end; i++) + chess_prepare_for_pipelining chess_loop_range(2, ) + // chess_unroll_loop() + { + // Load input data [a0 a1 a2 a3 a4 a5 a6 a7] where each position + // has data for 8 channels + auto tmp_a1 = aie::load_v<32>(line[i]); + line[i] += 32; // act 0..3 (ic0..7 for each) + auto tmp_a2 = + aie::load_v<32>(line[i]); // act 4..7 (ic0..7 for each) + auto in_a = aie::concat(tmp_a1, tmp_a2); + + aie::vector tmp_a; +#ifdef BORDER_REPLICATE + tmp_a1 = aie::shuffle_up(tmp_a1, 24); + tmp_a.insert<32>(1, tmp_a1); +#else + tmp_a = aie::zeros(); +#endif + // Shift right 1 input (8 channels) [- a0 a1 a2 a3 a4 a5 a6] where + // - is either a0 or 0's + in_a = aie::shuffle_up_fill(in_a, tmp_a, 8); + + // Previous buffer stores shifted data, [- - - - a0 a1 a2 a3] + // where - is + // prev_a[i] = aie::shuffle_up(in_a, 24); // Shift right (4-1)*8 + + // prev_a[i] = in_a; + // prev_a[i] = aie::shuffle_up(prev_a[i], 24); // Shift right + // (4-1)*8 + + // For kernel width, we load 64 weights (8 ics x 8 ocs) and + // multiply it with the act buffer. acc[32] += in_a[32] * wts[64] + // We then shift the buffer left by 1 data position (8 channels). + for (int j = 0; j < kernel_width; j++) + chess_loop_range(3, 3) // TODO Assume 3x3 + chess_unroll_loop() { + auto in_b = aie::load_v<64>(wtsLine[i]); + wtsLine[i] += 64; // wts ic0..7(oc0..7) + acc1.mac(in_a.extract<32>(0), in_b); + // Shift input A by 1 row (1x8) which is by 1 (the 8 is the + // ic=8) + in_a = aie::shuffle_down(in_a, 8); + } + wtsLine[i] -= + (kernel_width * 64); // Reset weight pointer for this line + // wtsLine[i] += ((kernel_height-1)*kernel_width*64); // Move to + // next ic/8 position No need to load next set of weights because + // next row of weights immediately follows line[i] += (iw*4)*8; // + // Increment to next ic/8 position (reset at end of outermost + // loop) + } // for(int i=kernel_height_start; i o1 = acc1.to_vector(scale); + aie::store_v(output, o1); + output += iw * 8; // Shift to next oc/8 offset for left side + + acc1 = aie::zeros(); + + // Shift back to beginning of input + for (int i = kernel_height_start; i < kernel_height_end; i++) + chess_loop_range(2, ) { line[i] -= (input_channels / 8) * (iw * 8); } + + } // for(int oc=0; oc<(output_channels/8); oc++) { + + // Reset output to beginning, then add 4*8 + // Reset wts to beginning of wts + // Reset line to beginning of input, then add 4*8 + output -= (output_channels / 8) * (iw * 8) - 32; + for (int i = kernel_height_start; i < kernel_height_end; i++) + chess_loop_range(2, ) { + wtsLine[i] -= (output_channels / 8) * (input_channels / 8) * + kernel_width * kernel_height * + 64; // kernel_width*kernel_height*8*8 + // line[i] -= (output_channels/8)*(input_channels/8)*(iw*8)-32; // + line[i] += 32; + } + } + + // -------------------------------------------------------------------- + // Middle pattern + // -------------------------------------------------------------------- + // The middle seciton algorithm is different because we want to minimize + // the reloading of weights and activations. So instead, we use up to 8 + // accumulators to store partial products with activations being shifted. + // Then for the next kernel position, we reload weights. + // + // H,W,C8 + // -------------------------------------------------------------------- + + // Main loop for when input_width/4-2 > 8 + if (iw_32 > 0) { + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int iw_32c = 0; iw_32c < iw_32; iw_32c++) { + for (int ic = 0; ic < (input_channels / 8); ic++) + chess_loop_range(2, ) { + for (int i = kernel_height_start; i < kernel_height_end; i++) + chess_prepare_for_pipelining chess_loop_range(2, ) { // 1 to 3 + + for (int j = 0; j < kernel_width; j++) + chess_loop_range(3, 3) // TODO Assume 3x3 + chess_unroll_loop() { + aie::vector wtsVec = aie::load_v<64>(wtsLine[i]); + wtsLine[i] += 64; + + // auto prev = prev_a[i].extract<32>(1); + // prev + // = x0..x3(ci0..ci7) + auto prev = aie::load_v<32>((line[i] - 32)); + auto curr = aie::load_v<32>((line[i])); + line[i] += 32; + auto next = aie::load_v<32>((line[i])); + // line[i] += 32; + + auto tprev = aie::concat(zero32, prev); + auto tmp1 = aie::concat(curr, next); + + tmp1 = aie::shuffle_up_fill( + tmp1, tprev, 8); // curr = x3..x6(ci0..ci7) + + tmp1 = aie::shuffle_down( + tmp1, j * 8); // curr = x4..x7(ci0..ci7) to + + // j = 0, 1, 2 + int j1 = j + 1; // 1, 2, 3 + int j2 = j + 3 - (j >> 1) * 4; // 3, 4, 1 + int lineIncr = (j >> 1) * 32; // 0, 0, 32 + + for (int x = 0; x < 8; x++) + chess_unroll_loop() chess_loop_range(8, 8) { + // auto tmp1 = aie::concat(curr, next); + // auto tprev = aie::concat(zero32, prev); + // auto tmp2 = aie::shuffle_up_fill( + // tmp1, tprev, 8); // curr = x3..x6(ci0..ci7) + // auto tmp3 = aie::shuffle_down( + // tmp2, j * 8); // curr = x4..x7(ci0..ci7) to + // // x5..x8(ci0..ci7)ss + + // prev = curr; + // curr = next; + // next = aie::load_v<32>(line[i]); + + // line[i] += 32; // next_prev = x4..x7(ci0..ci7) + + // acc_tmp[x].mac(tmp3.extract<32>(0), wtsVec); + + acc_tmp[x].mac(tmp1.extract<32>(0), wtsVec); + + tmp1 = aie::shuffle_down(tmp1, j1 * 8); + tmp1.insert(1, aie::load_v<32>(line[i] + lineIncr)); + line[i] += 32; + tmp1 = aie::shuffle_down(tmp1, j2 * 8); + + } // for(int x=0; x<8; x++) + line[i] -= 320; // (8+2)*32, Reset line buffer ptr to + // beginning of line (after first 4) + } // for(int j=0; j o1 = acc_tmp[x].to_vector(scale); + aie::store_v(output, o1); + output += 32; + acc_tmp[x] = aie::zeros(); + } + // For next 8 activations, reset line buffer and weights + for (int i = kernel_height_start; i < kernel_height_end; i++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + line[i] -= + (input_channels / 8) * (iw * 8); // length of act to shift back + } + } // for(int iw_32c=0; iw_32c 0) + + // Secondary loop for input_width remainder (iw_32_rem < 8) + if (iw_32_rem > 0) { + + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int ic = 0; ic < (input_channels / 8); ic++) + chess_loop_range(2, ) { + for (int i = kernel_height_start; i < kernel_height_end; i++) + chess_prepare_for_pipelining chess_loop_range(2, ) { // 1 to 3 + for (int j = 0; j < kernel_width; j++) + chess_loop_range(3, 3) // TODO Assume 3x3 + chess_unroll_loop() { + // New weight every kernel_width + aie::vector wtsVec = aie::load_v<64>(wtsLine[i]); + wtsLine[i] += 64; + // auto prev = prev_a[i].extract<32>(1); // + // prev = x0..x3(ci0..ci7) + auto prev = aie::load_v<32>((line[i] - 32)); + auto curr = aie::load_v<32>((line[i])); + line[i] += 32; + auto next = aie::load_v<32>((line[i])); + // line[i] += 32; + + auto tprev = aie::concat(zero32, prev); + auto tmp1 = aie::concat(curr, next); + + // j = 0, 1, 2 + int jr0 = (2 - j) >> 1; // 1, 0, 0 + int j0 = (j >> 1); // 0, 0, 1 + int j1 = j + 1; // 1, 2, 3 + int j2 = j + 3 - ((j >> 1) * 4); // 3, 4, 1 + int lineIncr = (j >> 1) * 32; // 0, 0, 32 + + tmp1 = aie::shuffle_up_fill( + tmp1, tprev, jr0 * 8); // curr = x3..x6(ci0..ci7) + + tmp1 = aie::shuffle_down( + tmp1, j0 * 8); // curr = x4..x7(ci0..ci7) to + + for (int x = 0; x < iw_32_rem; x++) // remainder input width < + // 8 chess_unroll_loop() + chess_unroll_loop() { + // auto tmp1 = aie::concat(curr, next); + // auto tprev = aie::concat(zero32, prev); + // auto tmp2 = aie::shuffle_up_fill( + // tmp1, tprev, 8); // curr = x3..x6(ci0..ci7) + // auto tmp3 = aie::shuffle_down( + // tmp2, + // j * 8); // curr = x3..x6(ci0..ci7) to + // x5..x8(ci0..ci7)ss + + // prev = curr; + // curr = next; + // next = aie::load_v<32>(line[i]); + // line[i] += 32; // next_prev = x4..x7(ci0..ci7) + + // acc_tmp[x].mac(tmp3.extract<32>(0), wtsVec); + acc_tmp[x].mac(tmp1.extract<32>(0), wtsVec); + + tmp1 = aie::shuffle_down(tmp1, j1 * 8); + tmp1.insert(1, aie::load_v<32>(line[i] + lineIncr)); + line[i] += 32; + tmp1 = aie::shuffle_down(tmp1, j2 * 8); + } + line[i] -= (iw_32_rem + 1) * + 32; // Reset line buffer ptr to beginning of + // (iw_32_rem + 2) * 32; // Reset line buffer ptr to beginning + // of line (after first 4) + } // for(int j=0; j o1 = acc_tmp[x].to_vector(scale); + aie::store_v(output, o1); + output += 32; + acc_tmp[x] = aie::zeros(); // Reset accumulators + } + // Reset line ptr to beginning of input + for (int i = kernel_height_start; i < kernel_height_end; i++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + line[i] -= (input_channels / 8) * (iw * 8); + } + // Output ptr should be in the right place (next oc/8) + output += (iw * 8) - (iw_32_rem * 32); // 32 = 4*8, shift to next oc/8 + } // for(int oc=0; oc<(output_channels/8); oc++) + // Reset weights and line buffers for right side + for (int i = kernel_height_start; i < kernel_height_end; i++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + wtsLine[i] -= (output_channels / 8) * (input_channels / 8) * + kernel_width * kernel_height * + 64; // kernel_width*kernel_height*8*8 + line[i] += + iw_32_rem * 32; // shift to beginnign of right data, iw_32_rem*4*8 + } + // shift back so we're aligned with beginning of first oc/8 (rightmost 4 + // data) + output -= (output_channels / 8) * (iw * 8) - (iw_32_rem * 32); + + } // if (iw_32_rem > 0) { + + // -------------------------------------------------------------------- + // Right patterns + // -------------------------------------------------------------------- + // + // -------------------------------------------------------------------- + { + MMUL4x8x8 acc1 = aie::zeros(); + for (int oc = 0; oc < (output_channels / 8); oc++) { + for (int ic = 0; ic < (input_channels / 8); ic++) + chess_loop_range(2, ) { + for (int i = kernel_height_start; i < kernel_height_end; i++) + chess_prepare_for_pipelining chess_loop_range(2, ) + // chess_unroll_loop() + { + // Load next set of data for input A (matrix row), need stride + // info or line1/2/3 pointer + // TODO, did not store previous so need to load it again + // in_a = aie::load_v<64>(line[i]-32); + auto tmp_a1 = + aie::load_v<32>(line[i] - 32); // act 24..27 (ic0..7 for each) + auto tmp_a2 = + aie::load_v<32>(line[i]); // act 28..31 (ic0..7 for each) + auto in_a = aie::concat(tmp_a1, tmp_a2); + + aie::vector tmp_a; +#ifdef BORDER_REPLICATE + tmp_a2 = aie::shuffle_down(tmp_a2, 24); + tmp_a.insert<32>(0, tmp_a2); +#else + tmp_a = aie::zeros(); +#endif + // shift by 32-8 (fill 32 then shift up by 8) + in_a = + aie::shuffle_down_fill(in_a, tmp_a, 24); // act 27..31 - - - + + for (int j = 0; j < kernel_width; j++) + chess_loop_range(3, 3) chess_unroll_loop() { + auto in_b = aie::load_v<64>(wtsLine[i]); + wtsLine[i] += 64; // wts ic0..7(oc0..7) + acc1.mac(in_a.extract<32>(0), in_b); + // Shift input A by 1 row (1x8) which is by 1 (the 8 is the + // ic=8) + in_a = aie::shuffle_down(in_a, 8); + } + wtsLine[i] += ((kernel_height - 1) * kernel_width * + 64); // Move to next ic/8 position + // No need to load next set of weights because next row of weights + // immediately follows + line[i] += (iw * 8); // Increment to next ic/8 position (reset at + // end of outermost loop) + } // for(int i=kernel_height_start; i o1 = acc1.to_vector(scale); + aie::store_v(output, o1); + output += iw * 8; // Shift to next oc/8 + + acc1 = aie::zeros(); + + for (int i = kernel_height_start; i < kernel_height_end; i++) + chess_prepare_for_pipelining chess_loop_range(2, ) { + line[i] -= (input_channels / 8) * + (iw * 8); // shift back to beginning of this section + } + } // for(int oc=0; oc<(output_channels/8); oc++) { + } + event1(); +} + +#endif // UINT8_ACT + +#endif // Vector + +extern "C" { + +#ifdef SCALAR + +#ifdef INT8_ACT + +void conv2dk3_i8(int8_t *line0, int8_t *line1, int8_t *line2, int8_t *wts, + uint8_t *output, const int32_t input_width, + const int32_t input_channels, const int32_t output_channels, + const int32_t kernel_width, const int32_t kernel_height, + const int32_t check, const int scale, + const int channel_offset) { + conv2dk3_i8_scalar(line0, line1, line2, wts, output, input_width, + input_channels, output_channels, kernel_width, + kernel_height, check, scale, channel_offset); +} + +#else // UINT8_ACT + +void conv2dk3_ui8(uint8_t *line0, uint8_t *line1, uint8_t *line2, int8_t *wts, + uint8_t *output, const int32_t input_width, + const int32_t input_channels, const int32_t output_channels, + const int32_t kernel_width, const int32_t kernel_height, + const int32_t check, const int scale, + const int channel_offset) { + conv2dk3_ui8_scalar(line0, line1, line2, wts, output, input_width, + input_channels, output_channels, kernel_width, + kernel_height, check, scale, channel_offset); +} + +#endif // UINT8_ACT + +#else // Vector + +#ifdef INT8_ACT + +void conv2dk3_i8(int8_t *line0, int8_t *line1, int8_t *line2, int8_t *wts, + uint8_t *output, const int32_t input_width, + const int32_t input_channels, const int32_t output_channels, + const int32_t kernel_width, const int32_t kernel_height, + const int32_t check, const int scale, + const int channel_offset) { + conv2dk3_i8_vector(line0, line1, line2, wts, output, input_width, + input_channels, output_channels, kernel_width, + kernel_height, check, scale, channel_offset); +} + +#else // UINT8_ACT + +void conv2dk3_ui8(uint8_t *line0, uint8_t *line1, uint8_t *line2, int8_t *wts, + uint8_t *output, const int32_t input_width, + const int32_t input_channels, const int32_t output_channels, + const int32_t kernel_width, const int32_t kernel_height, + const int32_t check, const int scale, + const int channel_offset) { + conv2dk3_ui8_vector(line0, line1, line2, wts, output, input_width, + input_channels, output_channels, kernel_width, + kernel_height, check, scale, channel_offset); +} + +#endif // UINT8_ACT + +#endif // Vector +} \ No newline at end of file diff --git a/aie_kernels/aie2/conv2dk3.h b/aie_kernels/aie2/conv2dk3.h new file mode 100755 index 0000000000..61a2f8e698 --- /dev/null +++ b/aie_kernels/aie2/conv2dk3.h @@ -0,0 +1,33 @@ +//===- conv2dk3.h -------------------------------------------------*- C++ +//-*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +#ifndef _CONV2DK3_H +#define _CONV2DK3_H + +extern "C" { + +void conv2dk3_i8(int8_t *line0, int8_t *line1, int8_t *line2, int8_t *wts, + uint8_t *output, const int32_t input_width, + const int32_t input_channels, const int32_t output_channels, + const int32_t kernel_width, const int32_t kernel_height, + const int32_t check, const int scale, + const int channel_offset); + +void conv2dk3_ui8(uint8_t *line0, uint8_t *line1, uint8_t *line2, int8_t *wts, + uint8_t *output, const int32_t input_width, + const int32_t input_channels, const int32_t output_channels, + const int32_t kernel_width, const int32_t kernel_height, + const int32_t check, const int scale, + const int channel_offset); + +} // extern "C" + +#endif diff --git a/programming_examples/ml/bottleneck/CMakeLists.txt b/programming_examples/ml/bottleneck/CMakeLists.txt new file mode 100644 index 0000000000..4b897cb29c --- /dev/null +++ b/programming_examples/ml/bottleneck/CMakeLists.txt @@ -0,0 +1,89 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +# parameters +# -DBOOST_ROOT: Path to Boost install +# -DOpenCV_DIR: Path to OpenCV install +# -DXRT_INC_DIR: Full path to src/runtime_src/core/include in XRT cloned repo +# -DXRT_LIB_DIR: Path to xrt_coreutil.lib +# -DTARGET_NAME: Target name to be built + +# cmake needs this line +cmake_minimum_required(VERSION 3.1) + +find_program(WSL NAMES powershell.exe) + +if (NOT WSL) + set(BOOST_ROOT /usr/include/boost CACHE STRING "Path to Boost install") + set(OpenCV_DIR /usr/include/opencv4 CACHE STRING "Path to OpenCV install") + set(XRT_INC_DIR /opt/xilinx/xrt/include CACHE STRING "Path to XRT cloned repo") + set(XRT_LIB_DIR /opt/xilinx/xrt/lib CACHE STRING "Path to xrt_coreutil.lib") +else() + set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") + set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install") + set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") + set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") +endif () + +set(EDGEDETECT_WIDTH 1920 CACHE STRING "image width") +set(EDGEDETECT_HEIGHT 1080 CACHE STRING "image height") + +set(TARGET_NAME test CACHE STRING "Target to be built") + +SET (ProjectName ${TARGET_NAME}) +SET (currentTarget ${TARGET_NAME}) + +if ( WSL ) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}) +endif () + +project(${ProjectName}) + +# Find packages +find_package(Boost REQUIRED) +find_package(OpenCV REQUIRED) +message("opencv library paht: ${OpenCV_LIB_PATH}") +message("opencv libs: ${OpenCV_LIBS}") + + +add_executable(${currentTarget} + ${CMAKE_CURRENT_SOURCE_DIR}/../../../utils/OpenCVUtils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../../utils/xrtUtils.cpp + test.cpp +) + +target_compile_definitions(${currentTarget} PUBLIC + EDGEDETECT_WIDTH=${EDGEDETECT_WIDTH} + EDGEDETECT_HEIGHT=${EDGEDETECT_HEIGHT} + DISABLE_ABI_CHECK=1 + ) + +target_include_directories (${currentTarget} PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/../../../utils + ${XRT_INC_DIR} + ${OpenCV_INCLUDE_DIRS} + ${Boost_INCLUDE_DIRS} +) + +target_link_directories(${currentTarget} PUBLIC + ${XRT_LIB_DIR} + ${OpenCV_LIB_PATH} + ${Boost_LIBRARY_DIRS} +) + +if (NOT WSL) + target_link_libraries(${currentTarget} PUBLIC + xrt_coreutil + ${OpenCV_LIBS} + boost_program_options + boost_filesystem + ) +else() + target_link_libraries(${currentTarget} PUBLIC + xrt_coreutil + ${OpenCV_LIBS} + ) +endif() diff --git a/programming_examples/ml/bottleneck/Makefile b/programming_examples/ml/bottleneck/Makefile new file mode 100755 index 0000000000..f5c6e4561f --- /dev/null +++ b/programming_examples/ml/bottleneck/Makefile @@ -0,0 +1,40 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +include ../../makefile-common + +mlirFileName = aie + +all: build/conv2dk1.o build/conv2dk3.o build/conv2dk1_skip.o build/final.xclbin + +build/${mlirFileName}.mlir: aie2.py + mkdir -p ${@D} + python3 $< > $@ + +insts.txt: build/${mlirFileName}.mlir + aiecc.py -v --aie-only-generate-ipu --ipu-insts-name=$@ $< + +build/conv2dk1.o: ../../../aie_kernels/aie2/conv2dk1.cc + xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ + +build/conv2dk3.o: ../../../aie_kernels/aie2/conv2dk3.cc + xchesscc -d ${CHESSCC2_FLAGS} -DUINT8_ACT -c $< -o $@ + +build/conv2dk1_skip.o: ../../../aie_kernels/aie2/conv2dk1_skip.cc + xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ + +build/final.xclbin: build/${mlirFileName}.mlir + cd build && aiecc.py -v --aie-generate-cdo --aie-generate-ipu --no-compile-host \ + --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + +clean: + rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log .xclbin sim \ + chess* *.o insts.txt \ + *.log aie_partition.json *.bin BOOT.BIN _x test.exe + +run_py: + ${powershell} python3 test.py diff --git a/programming_examples/ml/bottleneck/README.md b/programming_examples/ml/bottleneck/README.md new file mode 100644 index 0000000000..144b8e36f2 --- /dev/null +++ b/programming_examples/ml/bottleneck/README.md @@ -0,0 +1,125 @@ + + +# The Bottleneck Block +## Introduction +The bottleneck block is a key component in deep neural network architectures, such as ResNet. It is designed to help address the challenge of training very deep networks by reducing the computational cost while maintaining or improving performance. This README provides an overview of the process and considerations for accelerating a single bottleneck block. + + +## Bottleneck Block Overview +The components and functionality of a standard bottleneck block: + +* Identity Mapping: The core idea behind bottleneck blocks is the concept of identity mapping. Traditional neural network layers aim to learn a mapping from input to output. In contrast, a bottleneck block learns a residual mapping, which is the difference between the input and the output. The original input is then added back to this residual mapping to obtain the final output. Mathematically, this can be represented as `output = input+ residual.` + +* Convolutional Layers: Bottleneck blocks typically consist of one or more convolutional layers. These layers are responsible for learning features from the input data. Convolutional layers apply filters/kernels to the input feature maps to extract relevant patterns and features. The number of filters, kernel size, and other parameters can vary based on the specific architecture and requirements. + +* Activation Function: After each convolutional layer, an activation function is applied to introduce non-linearity into the network. Rectified Linear Unit (ReLU) is commonly used as the activation function due to its simplicity and effectiveness. + +* Batch Normalization: Batch normalization is often employed after convolutional layers to stabilize and accelerate the training process. It normalizes the activations of each layer, making optimization more robust and efficient. + +* Skip Connection (Identity Shortcut): This is the hallmark of bottleneck blocks. The skip connection directly passes the input from one layer to a later layer without any modification. It provides an alternative, shorter path for gradient flow during training. If the input and output dimensions of the bottleneck block are the same, the skip connection directly adds the input to the output. If the dimensions differ, the skip connection might include a 1x1 convolutional layer to adjust the dimensions accordingly. + +* Final Output: The final output of the bottleneck block is obtained by adding the input to the output of the convolutional layers (including any adjustments made to match dimensions via the skip connection). +

+ + + block + +

Bottleneck block has a 1x1 convolution layer for dimension reduction, a 3x3 convolution layer, and a 1x1 convolution layer for dimension restoration. +

+

+ +## Acceleration Techniques +1. Depth-First Implementation: Spatial architectures provide coarse-grained flexibility that allows for tailoring of the dataflow to optimize data movement. By tailoring the dataflow, we implement depth-first schedule for a bottleneck block routing the output of one convolutional operation on an AIE core directly to another convolutional operation on a separate AIE core, all without the need to transfer intermediate results off-chip. This approach effectively minimizes the memory footprint associated with intermediate data, mitigating the overhead of costly off-chip accesses leading to increase in the overall performance. + +2. Data Layout: Optimize activation and weight layout to enhance memory access patterns and enables effective utilization of AIE parallel processing units, ultimately improving the performance of 2D convolution operations. + +3. Kernel Optimzation: To optimize convolution operations on AIE, we vectorize the code using AIE vector intrinsics. We load 8 elements of the input channel into vector registers using vector load intrinsic. We apply the convolution operation on this loaded data, utilizing for enhanced computational efficiency. To ensure accurate convolution results, particularly at the edges of feature maps, we implement zero-padding to handle boundary conditions. This comprehensive approach optimizes convolution processing on AIE, facilitating efficient and accurate feature extraction in neural network applications. Input is 4x8 matrix corresponding to 4 element of row and 8 input channels. + +4. Quantization: We use int8 precision for activationa and weights. At int8 precision, AIE offers the highest compute density with 256 MAC/cycle. + +5. Layer Fused: We perform two levels of fusion. First, we fuse ReLU in convolution using SRS capabilities of AIE. Second, we fuse BatchNorm into convolution weights. + + + +## Data Layout +We need to ensure that the data layout is compatible with efficient SIMD processing and rearrange the input data into a format where contiguous elements represent consecutive X-dimension values for each channel. For more efficient processing, we adopt a channels-last memory ordering, denoted as NYCXC8, to ensure that channels become the densest dimension. Operating on 8 elements simultaneously, we process 8 channels with the same width at once. Subsequently, we traverse the entire width dimension, handling the remaining channels in batches of 8. This process continues row-wise, resulting in our final data layout pattern: NYCXC8. This optimized layout enhances memory access patterns and enables effective utilization of parallel processing units, ultimately improving the performance of 2D convolution operations. This transformation ensures that data can be efficiently loaded into SIMD registers and processed in parallel. + +YCXC8 Input/Output Data Layout: + +In the YCXC8 (with N=1) data layout, the data is organized in memory as follows: + +* Y: Represents the output feature map dimension. +* C: Denotes the number of channels. +* X: Represents the input feature map dimension. +* C8: Indicates that 8 elements of the input channel are processed together. + +OIYXI8O8 Weight Layout: + +We align the weight layout as specified: O,I,Y,X,I8,O8, to match the input image processing. We first load the weight tensor, organizing it to match this layout, where dimensions represent: output channels, input channels, kernel height, kernel width, input channel groups of 8, and output channel groups of 8. By aligning the weight layout in this manner, we enable seamless integration with the input data layout, maximizing parallelism and minimizing memory access overhead. + +In the OIYXI8O8 data layout, the data is organized in memory as follows: + +* O: Denotes the number of output channels. +* I: Denotes the number of input channels. +* Y: Represents the kernel height. +* X: Represents the kernel weight. +* I8: Indicates that 8 elements of the input channel are processed together. +* O8: Indicates that 8 elements of the output channel are processed together. + +## Fusing Convolution and Batch Normalization + +We assume the BatchNorm layer is fused into Convoluion Layer. Fusing BatchNorm into convolution involves incorporating the normalization step directly into the convolution operation. This is achieved by modifying the weights of the convolutional filters to include the scaling and shifting factors. Specifically, the weights are adjusted such that the convolution operation performs the normalization, scaling, and shifting in a single step. + +## Fusing ReLU + +Fusing ReLU into the convolution operation can further optimize the implementation by reducing memory bandwidth requirements and computational overhead. ReLU activation function introduces non-linearity by setting negative values to zero and leaving positive values unchanged. Utilize SIMD instructions to efficiently compute ReLU activation in parallel with convolution. After performing the convolution operation, apply ReLU activation function at vector register level. +We use `aie::set_rounding()` and `aie::set_saturation()` to set the rounding and saturation modes for the computed results in the accumulator. Seeting round mode `postitive_inf` rounds halfway towards positive infinity while setting saturation to `aie::saturation_mode::saturate` saturation rounds an uint8 range (0, 255). + +``` +::aie::set_saturation( + aie::saturation_mode::saturate); // Needed to saturate properly to uint8 +::aie::set_rounding( + aie::rounding_mode::positive_inf); // Needed to saturate properly to uint8 +``` +After convolution and ReLU fusion, the output data is generate in YCXC8 layout. Ensure that the output data layout is compatible with subsequent layers or processing steps in the neural network architecture. + + +### Benefits of ReLU Fusion: + +1. Reduced Memory Bandwidth: +By fusing ReLU into the convolution operation, unnecessary memory accesses and data transfers associated with separate ReLU computation are eliminated, leading to reduced memory bandwidth requirements. + +2. Improved Performance: +Fusing ReLU reduces the number of instructions executed per element, resulting in improved computational efficiency and overall performance of the convolution operation. + +3. Simplified Code Structure: +Fusing ReLU into the convolution kernel simplifies the code structure and reduces the overhead associated with separate activation function calls, leading to cleaner and more maintainable code. + +4. Enhanced Resource Utilization: +By combining convolution and ReLU operations, computational resources such as CPU cores or SIMD units are utilized more efficiently, maximizing throughput and achieving better resource utilization. + +## Compilation +To compile the design: +``` +make +``` + +To run the design: +``` +make run_py +``` + +### Prerequisites +To install the dependencies, run the following command: +``` +pip install -r requirements.txt + +``` \ No newline at end of file diff --git a/programming_examples/ml/bottleneck/aie2.py b/programming_examples/ml/bottleneck/aie2.py new file mode 100644 index 0000000000..a488ae8ded --- /dev/null +++ b/programming_examples/ml/bottleneck/aie2.py @@ -0,0 +1,639 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.extras.dialects.ext import memref, arith +from aie.dialects.scf import * +from aie.extras.context import mlir_mod_ctx +from aie.ir import MemRefType, TypeAttr + +import sys + +# tracing definitions +trace_sz_in_bytes = 8192 +trace_sz_in_i32s = trace_sz_in_bytes // 4 +enableTrace = False + +# Define bottleneck layer sizes + +tensorInW = 32 +tensorInH = 32 +tensorInC = 256 + +tensorL1InC = tensorInC +tensorL1OutC = tensorL1InC // 4 + +tensorL2InC = tensorL1OutC +tensorL2OutC = tensorL2InC + +tensorL3InC = tensorL2OutC +tensorL3OutC = tensorL3InC * 4 + + +def bottleneck4AIEs(): + with mlir_mod_ctx() as ctx: + + @device(AIEDevice.ipu) + def deviceBody(): + + # define types + uint8_ty = IntegerType.get_unsigned(8) + int8_ty = IntegerType.get_signless(8) + int16_ty = IntegerType.get_signless(16) + int32_ty = IntegerType.get_signless(32) + + tensorLayer1In_ty = MemRefType.get( + ( + tensorInW, + 1, + tensorL1InC, + ), + int8_ty, + ) + weightsLayer1_ty = MemRefType.get((tensorL1InC * tensorL1OutC,), int8_ty) + tensorLayer1Out_ty = MemRefType.get( + ( + tensorInW, + 1, + tensorL1OutC, + ), + uint8_ty, + ) + + tensorLayer2In_ty = MemRefType.get( + ( + tensorInW, + 1, + tensorL2InC, + ), + uint8_ty, + ) + weightsLayer2_ty = MemRefType.get( + (3 * 3 * tensorL2InC * tensorL2OutC,), int8_ty + ) + tensorLayer2Out_ty = MemRefType.get( + ( + tensorInW, + 1, + tensorL2OutC // 2, + ), + uint8_ty, + ) + + tensorLayer3In_ty = MemRefType.get( + ( + tensorInW, + 1, + tensorL3InC // 2, + ), + uint8_ty, + ) + weightsLayer3_ty = MemRefType.get((tensorL3InC * tensorL3OutC,), int8_ty) + tensorLayer3Out_ty = MemRefType.get( + ( + tensorInW, + 1, + tensorL3OutC, + ), + uint8_ty, + ) + + allWeights_ty = MemRefType.get( + ( + tensorL1InC * tensorL1OutC + + 3 * 3 * tensorL2InC * tensorL2OutC + + tensorL3InC * tensorL3OutC, + ), + int8_ty, + ) + + # kernel definitions + conv2dk1 = external_func( + "conv2dk1_i8", + inputs=[ + tensorLayer1In_ty, + weightsLayer1_ty, + tensorLayer1Out_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + ], + ) + conv2dk3 = external_func( + "conv2dk3_ui8", + inputs=[ + tensorLayer2In_ty, + tensorLayer2In_ty, + tensorLayer2In_ty, + weightsLayer2_ty, + tensorLayer2Out_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + ], + ) + conv2dk1_skip = external_func( + "conv2dk1_skip_i8", + inputs=[ + tensorLayer3In_ty, + tensorLayer3In_ty, + weightsLayer3_ty, + tensorLayer3Out_ty, + tensorLayer1In_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + ], + ) + + ShimTile = tile(0, 0) + MemTile = tile(0, 1) + ComputeTile2 = tile(0, 2) + ComputeTile3 = tile(0, 3) + ComputeTile4 = tile(0, 4) + ComputeTile5 = tile(0, 5) + + if enableTrace: + flow(ComputeTile4, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1) + + # runtime parameters + + rtpComputeTile2 = Buffer(ComputeTile2, [16], T.i32(), "rtpComputeTile2") + rtpComputeTile3 = Buffer(ComputeTile3, [16], T.i32(), "rtpComputeTile3") + rtpComputeTile4 = Buffer(ComputeTile4, [16], T.i32(), "rtpComputeTile4") + rtpComputeTile5 = Buffer(ComputeTile5, [16], T.i32(), "rtpComputeTile5") + + # set up data movement with OFs + # input tensor (with broadcast for skip connection) + of_inOF_act_L3L2 = object_fifo( + "inOF_act_L3L2", + ShimTile, + [ComputeTile2, MemTile], + [2, 2, 4], + tensorLayer1In_ty, + ) + of_skip_buf = object_fifo( + "skip_buf", MemTile, ComputeTile4, 2, tensorLayer1In_ty + ) + object_fifo_link(of_inOF_act_L3L2, of_skip_buf) + + # weights + inOF_wts_0_L3L2 = object_fifo( + "inOF_wts_0_L3L2", ShimTile, MemTile, 1, allWeights_ty + ) + of_wts_buf_00 = object_fifo( + "wts_buf_00", MemTile, ComputeTile2, 1, weightsLayer1_ty + ) + wts_buf_01 = object_fifo( + "wts_buf_01", + MemTile, + [ComputeTile3, ComputeTile5], + 1, + weightsLayer2_ty, + ) + wts_buf_02 = object_fifo( + "wts_buf_02", MemTile, ComputeTile4, 1, weightsLayer3_ty + ) + object_fifo_link(inOF_wts_0_L3L2, [of_wts_buf_00, wts_buf_01, wts_buf_02]) + + # activation tensor + of_act_2_3_5 = object_fifo( + "act_2_3_5", + ComputeTile2, + [ComputeTile3, ComputeTile5], + [2, 4, 4], + tensorLayer1Out_ty, + ) # 1x1 -> 3x3 + act_3_4 = object_fifo( + "act_3_4", ComputeTile3, ComputeTile4, 2, tensorLayer2Out_ty + ) # 3x3 -> 1x1 + act_5_4 = object_fifo( + "act_5_4", ComputeTile5, ComputeTile4, 2, tensorLayer2Out_ty + ) # 3x3 -> 1x1 + + # output tensor + outOFL2L3 = object_fifo( + "outOFL2L3", ComputeTile4, ShimTile, 2, tensorLayer3Out_ty + ) + + # 1x1 conv2d + @core(ComputeTile2, "conv2dk1.o") + def core_body(): + for _ in for_(sys.maxsize): + + # acquire weights once + element0Weights = of_wts_buf_00.acquire(ObjectFifoPort.Consume, 1) + scale = memref.load(rtpComputeTile2, [0]) + for _ in for_(tensorInH): + element0ActivactionsIn = of_inOF_act_L3L2.acquire( + ObjectFifoPort.Consume, 1 + ) + element0ActivactionsOut = of_act_2_3_5.acquire( + ObjectFifoPort.Produce, 1 + ) + res = call( + conv2dk1, + [ + element0ActivactionsIn, + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL1InC, + tensorL1OutC, + scale, + ], + ) + + objectfifo_release(ObjectFifoPort.Consume, "inOF_act_L3L2", 1) + + objectfifo_release(ObjectFifoPort.Produce, "act_2_3_5", 1) + yield_([]) + objectfifo_release(ObjectFifoPort.Consume, "wts_buf_00", 1) + yield_([]) + + # 3x3 conv2d OFM 0-31 + @core(ComputeTile3, "conv2dk3.o") + def core_body(): + scale = 11 + for _ in for_(sys.maxsize): + + # acquire weights and rtps once + element0Weights = wts_buf_01.acquire(ObjectFifoPort.Consume, 1) + # scale = memref.load(rtpComputeTile3, 0) + + # pre-amble: top row + elementActivactionsIn = of_act_2_3_5.acquire( + ObjectFifoPort.Consume, 2 + ) + element0ActivactionsOut = act_3_4.acquire(ObjectFifoPort.Produce, 1) + res = call( + conv2dk3, + [ + elementActivactionsIn[0], + elementActivactionsIn[0], + elementActivactionsIn[1], + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL2InC, + tensorL2OutC, + 3, + 3, + 0, + scale, + 0, + ], + ) + objectfifo_release(ObjectFifoPort.Produce, "act_3_4", 1) + + # middle + for _ in for_(tensorInH - 2): + elementActivactionsIn = of_act_2_3_5.acquire( + ObjectFifoPort.Consume, 3 + ) + element0ActivactionsOut = act_3_4.acquire( + ObjectFifoPort.Produce, 1 + ) + res = call( + conv2dk3, + [ + elementActivactionsIn[0], + elementActivactionsIn[1], + elementActivactionsIn[2], + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL2InC, + tensorL2OutC, + 3, + 3, + 1, + scale, + 0, + ], + ) + + objectfifo_release(ObjectFifoPort.Consume, "act_2_3_5", 1) + objectfifo_release(ObjectFifoPort.Produce, "act_3_4", 1) + yield_([]) + + # last part + elementActivactionsIn = of_act_2_3_5.acquire( + ObjectFifoPort.Consume, 2 + ) + element0ActivactionsOut = act_3_4.acquire(ObjectFifoPort.Produce, 1) + res = call( + conv2dk3, + [ + elementActivactionsIn[0], + elementActivactionsIn[1], + elementActivactionsIn[1], + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL2InC, + tensorL2OutC, + 3, + 3, + 2, + scale, + 0, + ], + ) + + objectfifo_release(ObjectFifoPort.Consume, "act_2_3_5", 2) + objectfifo_release(ObjectFifoPort.Produce, "act_3_4", 1) + + objectfifo_release(ObjectFifoPort.Consume, "wts_buf_01", 1) + yield_([]) + + # 3x3 conv2d OFM 32-63 + @core(ComputeTile5, "conv2dk3.o") + def core_body(): + scale = 11 + for _ in for_(sys.maxsize): + + # acquire weights and rtps once + element0Weights = wts_buf_01.acquire(ObjectFifoPort.Consume, 1) + # scale = memref.load(rtpComputeTile5, 0) + + # pre-amble: top row + elementActivactionsIn = of_act_2_3_5.acquire( + ObjectFifoPort.Consume, 2 + ) + element0ActivactionsOut = act_5_4.acquire(ObjectFifoPort.Produce, 1) + res = call( + conv2dk3, + [ + elementActivactionsIn[0], + elementActivactionsIn[0], + elementActivactionsIn[1], + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL2InC, + tensorL2OutC, + 3, + 3, + 0, + scale, + tensorL2OutC // 2, + ], + ) + + objectfifo_release(ObjectFifoPort.Produce, "act_5_4", 1) + + # middle + for _ in for_(tensorInH - 2): + elementActivactionsIn = of_act_2_3_5.acquire( + ObjectFifoPort.Consume, 3 + ) + element0ActivactionsOut = act_5_4.acquire( + ObjectFifoPort.Produce, 1 + ) + res = call( + conv2dk3, + [ + elementActivactionsIn[0], + elementActivactionsIn[1], + elementActivactionsIn[2], + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL2InC, + tensorL2OutC, + 3, + 3, + 1, + scale, + tensorL2OutC // 2, + ], + ) + + objectfifo_release(ObjectFifoPort.Consume, "act_2_3_5", 1) + objectfifo_release(ObjectFifoPort.Produce, "act_5_4", 1) + yield_([]) + + # last part + elementActivactionsIn = of_act_2_3_5.acquire( + ObjectFifoPort.Consume, 2 + ) + element0ActivactionsOut = act_5_4.acquire(ObjectFifoPort.Produce, 1) + res = call( + conv2dk3, + [ + elementActivactionsIn[0], + elementActivactionsIn[1], + elementActivactionsIn[1], + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL2InC, + tensorL2OutC, + 3, + 3, + 2, + scale, + tensorL2OutC // 2, + ], + ) + objectfifo_release(ObjectFifoPort.Consume, "act_2_3_5", 2) + objectfifo_release(ObjectFifoPort.Produce, "act_5_4", 1) + objectfifo_release(ObjectFifoPort.Consume, "wts_buf_01", 1) + yield_([]) + + # # 1x1 conv2d and add skip + @core(ComputeTile4, "conv2dk1_skip.o") + def core_body(): + for _ in for_(sys.maxsize): + + # acquire weights and rtps once + element0Weights = wts_buf_02.acquire(ObjectFifoPort.Consume, 1) + scale = memref.load(rtpComputeTile4, [0]) + skipScale = memref.load(rtpComputeTile4, [1]) + + for _ in for_(tensorInH): + element0ActivactionsIn = act_3_4.acquire( + ObjectFifoPort.Consume, 1 + ) + element1ActivactionsIn = act_5_4.acquire( + ObjectFifoPort.Consume, 1 + ) + elementSkipsIn = of_skip_buf.acquire(ObjectFifoPort.Consume, 1) + elementActivactionsOut = outOFL2L3.acquire( + ObjectFifoPort.Produce, 1 + ) + + call( + conv2dk1_skip, + [ + element0ActivactionsIn, + element1ActivactionsIn, + element0Weights, + elementActivactionsOut, + elementSkipsIn, + tensorInW, + tensorL3InC, + tensorL3OutC, + scale, + skipScale, + ], + ) + objectfifo_release(ObjectFifoPort.Produce, "outOFL2L3", 1) + objectfifo_release(ObjectFifoPort.Consume, "act_3_4", 1) + objectfifo_release(ObjectFifoPort.Consume, "act_5_4", 1) + objectfifo_release(ObjectFifoPort.Consume, "skip_buf", 1) + yield_([]) + objectfifo_release(ObjectFifoPort.Consume, "wts_buf_02", 1) + yield_([]) + + # instruction stream generation + activationsInSize32b = (tensorInW * tensorInH * tensorInC) // 4 + acitivationsOutSize32b = activationsInSize32b + totalWeightsSize32b = ( + tensorL1InC * tensorL1OutC + + 3 * 3 * tensorL2InC * tensorL2OutC + + tensorL3InC * tensorL3OutC + ) // 4 + + activationsInL3_ty = MemRefType.get((activationsInSize32b,), int32_ty) + weightsInL3_ty = MemRefType.get((totalWeightsSize32b,), int32_ty) + + @FuncOp.from_py_func(activationsInL3_ty, weightsInL3_ty, activationsInL3_ty) + def sequence(inputFromL3, weightsFromL3, outputToL3): + + if enableTrace: + # Trace output + + # Trace_Event0, Trace_Event1: Select which events to trace. + # Note that the event buffers only appear to be transferred to DDR in + # bursts of 256 bytes. If less than 256 bytes are written, you may not + # see trace output, or only see it on the next iteration of your + # kernel invocation, as the buffer gets filled up. Note that, even + # though events are encoded as 4 byte words, it may take more than 64 + # events to fill the buffer to 256 bytes and cause a flush, since + # multiple repeating events can be 'compressed' by the trace mechanism. + # In order to always generate sufficient events, we add the "assert + # TRUE" event to one slot, which fires every cycle, and thus fills our + # buffer quickly. + + # Some events: + # TRUE (0x01) + # STREAM_STALL (0x18) + # LOCK_STALL (0x1A) + # EVENTS_CORE_INSTR_EVENT_1 (0x22) + # EVENTS_CORE_INSTR_EVENT_0 (0x21) + # INSTR_VECTOR (0x25) Core executes a vecotr MAC, ADD or compare instruction + # INSTR_LOCK_ACQUIRE_REQ (0x2C) Core executes a lock .acquire instruction + # INSTR_LOCK_.release_REQ (0x2D) Core executes a lock .release instruction + # EVENTS_CORE_PORT_RUNNING_1 (0x4F) + # EVENTS_CORE_PORT_RUNNING_0 (0x4B) + + # Trace_Event0 (4 slots) + ipu_write32(0, 4, 0x340E0, 0x4B222125) + # Trace_Event1 (4 slots) + ipu_write32(0, 4, 0x340E4, 0x2D2C1A4F) + + # Event slots as configured above: + # 0: Kernel executes vector instruction + # 1: Event 0 -- Kernel starts + # 2: Event 1 -- Kernel done + # 3: Port_Running_0 + # 4: Port_Running_1 + # 5: Lock Stall + # 6: Lock .acquire Instr + # 7: Lock .release Instr + + # Stream_Switch_Event_Port_Selection_0 + # This is necessary to capture the Port_Running_0 and Port_Running_1 events + ipu_write32(0, 4, 0x3FF00, 0x121) + + # Trace_Control0: Define trace start and stop triggers. Set start event TRUE. + ipu_write32(0, 4, 0x340D0, 0x10000) + + # Start trace copy out. + ipu_writebd_shimtile( + bd_id=3, + buffer_length=trace_sz_in_i32s, + buffer_offset=acitivationsOutSize32b, + enable_packet=0, + out_of_order_id=0, + packet_id=0, + packet_type=0, + column=0, + column_num=1, + d0_stepsize=0, + d0_wrap=0, + d1_stepsize=0, + d1_wrap=0, + d2_stepsize=0, + ddr_id=2, + iteration_current=0, + iteration_stepsize=0, + iteration_wrap=0, + lock_acq_enable=0, + lock_acq_id=0, + lock_acq_val=0, + lock_rel_id=0, + lock_rel_val=0, + next_bd=0, + use_next_bd=0, + valid_bd=1, + ) + ipu_write32(0, 2, 0x1D20C, 0x3) + + # write RTP parameters + IpuWriteRTPOp( + "rtpComputeTile2", col=0, row=2, index=0, value=1 + ) # scale + IpuWriteRTPOp( + "rtpComputeTile3", col=0, row=3, index=0, value=1 + ) # scale + IpuWriteRTPOp( + "rtpComputeTile5", col=0, row=5, index=0, value=1 + ) # scale + IpuWriteRTPOp( + "rtpComputeTile4", col=0, row=4, index=0, value=1 + ) # scale: conv1x1 with the same scale as the input so we match the scaling factor of output after conv1x1 and the initial input + IpuWriteRTPOp( + "rtpComputeTile4", col=0, row=4, index=1, value=0 + ) # skip_scale + + ipu_dma_memcpy_nd( + metadata="inOF_act_L3L2", + bd_id=0, + mem=inputFromL3, + sizes=[1, 1, 1, activationsInSize32b], + ) + ipu_dma_memcpy_nd( + metadata="outOFL2L3", + bd_id=2, + mem=outputToL3, + sizes=[1, 1, 1, acitivationsOutSize32b], + ) + ipu_dma_memcpy_nd( + metadata="inOF_wts_0_L3L2", + bd_id=1, + mem=weightsFromL3, + sizes=[1, 1, 1, totalWeightsSize32b], + ) + + ipu_sync(column=0, row=0, direction=0, channel=0) + + print(ctx.module) + + +bottleneck4AIEs() diff --git a/programming_examples/ml/bottleneck/bottleneck_block.png b/programming_examples/ml/bottleneck/bottleneck_block.png new file mode 100644 index 0000000000000000000000000000000000000000..d5e88bbbd17e27317d2c6cebe43e141af7fe37ad GIT binary patch literal 227382 zcmZVl1CVCXuP+R@ZM%EgcK5Vx+qP}nwlQsc+O}=mdfI;Hf9|>Wocg|9m0HYD-~Z1xaCHA_XUV zGfNv&ARvjz)O1J{<#UvP)1AkJITWM>6fe|bQ~{~8!WT0VQedG-CM1=inFE+wN^0VQ z+Ow#DAZuU~x(M*iKWiUBv@jPP8uf;rm>eP7zNg*pJ2@=oJu)ZxnLj5#x0yh5nLLrC zu&NMa7^hN2AYGc!k&?6vKzoAFXn`OIJbrN@sL04588U0VlbzZDF&md^MeW~TA3xO8 zAchEpK#_eMq6veuBd?65!ak9#m_V(#54iMWsWAZ0m`tT=!5G3ArwtCoSf>fB!+T>U zCPh9hYoG*6o;+6g0JD%cP0vqOTEuBgW84M-(h;l=ID+-{2QVyS*M7L@L<63z{9Hgf zkEQ=)Ch_d}Vad3s4}-DLGy1Vaqet}j?C5Z45=H< z>WLAglap=8C$`yL5HUFx$S)8LB$($tAdr}YUnmc6Coq_Cd5U+L6q=;yB_*eg9BXu_ zjcrCfPpoOVvALjqXSw+NDE2oo+oB@5Jm=a<;CoX2IrbF3$ZPgq2224?U&XUmYyn!p z0^U^mv3e9lKq&b?XiX+_iu)6C(x5N^ce2{9D@o+o(ZOiMn9ZzWNQ=C!L3Dg$6pQh4*ZCqIKcM6*nznXLmV1Cczb<&EqQHu zmHO24Vflvp=IX)Z5BZTQKy8Lj15*b{PY{qGp(R2?o`Cj-5{EK}{tf*V;x0n0m}?>B zLJom~8i_qLJk&B|J49~8-H^Q|jz^v-kwq$<#7&hmL~@)IC7~_(AweOzD)Ci}Xae3y zupW0MFGn0bPBVUdY3~8=VuFJG0{x=Dlmv_Gbqa;{0|q=p^+MxWRkOgS!`Y+Bi)xCN7d9;2 zE$z=-I-s|s^F(G#>WWt9?26BdCsYiowMu-IKH8odp@l|OD78s7jcO%&$3YVT)RN00%kruPTIC(m zPt8_@prec?uZ*uTsxjL!O)-AtiIx*b%q$sXQ(sf`Cka`VS;1O48$vDAEr(XT7aXpr zuC-oiUKQ_xPudsxrDrnxGRSh8EG3Q;EmXH`rB)u6P;60bS*&(eJ_{z6@0Mm(c5^oK zKINgSjdM{;S_Lh-m(?rL6;l;<%5TciWdu21^0>=?=i(Ohs=K8>8lUUqYUKjrM9iyZ zEORkCE;{r&usgC{zi^MZB^fvM=@=3YMZ&!LM%d>xvZbnY2Osq`M zoaEHx)arE8hMYU7JI!||bnoE0aq2i@Qe2&4RcAH0*1tou-Lzx56Ml*@_g7AXw^)8> zAor|qnRh(oB+2@rcszl;uY8_)?5Qz>Fhk%fL0uW1vmAsu;b}>0Z0piT#EUnfdOjK7 zbq~ib(5}%gJ-=$NMz2(__&3D2bYOoVE{MIeYnzW9zB}a@r-kHP zrh%4&i-YOiAyPDwrGz#m4h~ue)1zR9V+Kbx8QwtqKJFl)ScceC{3}@!7B;db`Wxv4 zdnHjNOBOQk?+(6q-57Tn%c(DoCJjv+@f@YC>b8!ym;0W3(|ca%MbtD>9jPTLG_t?s ziAe`Z&!q6PkJ;^>WAznTn16Bdwe!8PFyyEY z;>zups+7Uq`@U+w&VuU&$p#4)cGkdA`FY#+NHSQj;(m zi;sLv-A!#w*=ycw%B~+>9bFX}sXCUvPVsDe!k)TH(~^lM<-OHD{GKIDf{=bWBI0av z)^g5Vb8yi#Bma)7q_RRf=g`hZvA8+y{9B*WopQG!v_tf7Nf~kRAJ?k+l_za} z^-}enYCx@3!Dh>~-X^a5*%jsb{kiyjyt^C3l}#;E1=X(TR%+|z_)Xh( zE`Ni6(>5tojv1ooPOA-xa?V- z-QL@2b78qMd#PE|cDI}D*mf3oFyFIV*;ao6+{A9_e+S$ZycPlm0pc6+R(>o$H5H>3 zryh0ox5uON@wYGcA9+7{4)|?IG`8K+1=g(3D5ce1g`o7zD1bkGYtB;-)W(j0&Lkc(}3$NJe{( z?+~%`4}TV3_~~NITm$?!{DJY$!3vfSY|8$%uWF_ml4i29K-B+uC?Mc)OCYd+9PmHF z`A7e{+@vrd$bT!!KN2Yb`M+A=?*h>O%Y(N5$52pNSW@!es%-3JYHH_fVefK*bc*-S z)rzHxhKq)*441LJExnn^GQx`)b4_g~MXD$z3;{USX`p5rA z%s@=^UnVZryu=!^3Pi&8PNqcc^o;b3#C$MBL_|DJCT3hpB4Ymy|M$jAY~kYKz{SAe z?(R#n6M!&Y9%@IQhSRL`u zQyWbYOWS|x`4@walZ}bzzwH12H2)Lve<3yg2g%07_`i|=OY{GWRCP9W61KPf7t)3A ze=76e;Qy`sZy*oDe|G-AGVy=X{9n?4dFF%RVfcSF zP!(O_I~Rar0~QjJC?tO*rU!BRN^Gg#)?MvMb6ur(1OC`hU)6QB?L4dXr3)x8FL)&G zM%4;HLXkpQc-Z;sy`Ad5?Vbw-0yazXzTL`XF`vn4x1X8H%w#b{$k8z8bWm{!$ZwoY z8G^=i91s~8c-(iN3`c?n#R`hoYCS~6!4uG|>W`;H%Nd4<&w*0@uBUz_*{7(JnIpIr z5rv2%)bM1&{t1ZxGy`J5VSoD4+A zweO4KM+=(2ss#(cFOSV>Q%YEH+<7SOB$p31E3gT>O-=W;GrkKzf$qOvcs+e7YztBC zsOEV`*JX|bKj)PWu^C=%_8$QG->+yQ;E~}O9QMoO3BJ^LV4zQWOhAVhB}CrY%Q^8D29 zcWVfRzjwdrmH`|#&95E$KAo%~(}#^X9o9Je?*GKsm>4`pCenjN#sTK%LL|xby6=As zDOy*c6*vV6JWQAVF5AlNmqqLo{wzB_UXnQlKfe-Lr~2+6l&=H`e?Yxv!XRU{NWb;~ z(jyrsDCEc~T>C0KRE-BWNabRyhjEd;(xdahu!va+dtYjY_C?=@C>ezMt%@Cm9|Z|4 zlaO*INEi}4+?5#_B1g?Z{|fgMX5x$e*j2!wf#_#rY>}WGBi!|Y3U$*A@?w4Ptl`y@ zdL&pvcYgs}s znBN#vS}F_;3veZMBGlGlU-Z*&5eXAkNLI70mL*|8Qb7Aab4sKJfvW$FX>+b=-_;xx zfyvsW-6s|$TFGk;#&GZ1o=w3_fjIBTY_jM5E5?x00J|gj0m%XcRUFNv%&n1Qc}NfC zBG4yLV#t*74M^Nj(wrc`^{%jzy`U#asBR@WJIf(VJnQOiS0@nhk02#DFBi}&`|H=a zGbBv67inkZxK0{u=mKD{Xcm8AZFS=`0R6LNRyq2f(TOMto^E*vPf|>+05}SqZ675R zm~s<4fQ4t#X z<_LNex0Es9WGn|isBecwPSgH3Di*fOm@W9D8pH6PQe7-SY-OZ`+sor2lD}%I&6G9+ zuG4QRNlWZXv@Kx7BI66`hgUF3Ik~j=r#~tAwBVl8jSWSpv_+%y24njS_w#~*=%3=^ zycHNKx0V3}P3MwS@68d8(eC4xn0o@e8b z#h!2-LPHIRwzR`!Wipi$e<%3A?>nZlIz@0s*!nwyEY{mBv$vQY4Y8N%a^t?AIehy* z9XQJ&DXKw;2NThwPsc!Cc**IKm5Nc6>gt(=2s-@BP0UCu?h1hT6iyo_L_vMfbmn|$ zbAQW+@dC7iEjEOb}_o2&ANsNnKHpw;r+Em2gJhaeiHz;AyY7jM@&g@23XN=rCJ!h6lN?CQ} zupO-?t+ywWlC!FE$UUtJq2H#I!LJ2^mUCRvLdF3hycUXrW!|7az9St8gVFN~RXOhW z8ciyhJNToU+602G#v<}u+pH|8fpl$Dg!F3dde$isHX!%9tJ~*xCJI$+F%GjeUr0wD zs+@4V>^K@1OufXshAY=TrgIbiTviCQji#@+XQv{@#gkDGI$%H2!(xf?;XbQTZMm6m zzH4&t6VGsm_1@S((w1tal!WE)xxi~-BUZy54tqHRavN^>)j`c~S?rzIQ&F(-1bH?d z(T~j)#&}<6blg>wWrQiy#sv}}dd%g&OT~6*Cu5Frr&dK(xQ`D*?_pgm6?GofapT@- z5^S0t7Ls3<`+VP~2Xlp#siY!HpEGamS6*kW`(}1ku7RSJuW2B6j0TmDiZsu+X{w-& z+u9?Rm$D%Z7r=mcNNJ()jI9AvgToxCmxJa?5RCcCy5mR+YCYqbchl)J2~7~I1)X{} zBxSxI2y(4<+c`JeotG>$EQ^-Q#61u?N=`JVHHm~A?VDS>gnsF&0=8CTTavl1Eu(=& z7GuHr6|Um^kX_`UWF-@Mb-?X$n4heSCgo|KSe8C)n#ICp&kG%*&X$=Ow3ZdN$IGL) ziLUJ`^LurIr5ua^Si!nN*RRrtZu-Q8JT4=B#$yW4i#PQW?bW!5@r23RHi#(e z4o1!jfBUBv+w5Q1*H{e9)_Q;WRCTM2rSDoGNXl*4yH@(EsV%BZhyP?|)hc(oM9N#x zNsik-`d&;?B_YPPTuenm&Ror^(`RSn%DHkBM^rA9xOB>3JroYOU#w~7pXNFLWeoxu zof{$gU8t`7ei3Sk;%p4ex;9nT8EEGvmciPkez~{J&|pZaRqPuMXPKG-rf*ykx3`4E zX}kkzRE&yki{;dSYG8=7N!U+}FP! zP?^3x5>TTtH%Xo;r4ho&h!X4mq?Y;kB`+nh{(yF0k+<(kxHroqTZaUzJrXIsrRxhNzeE(M z5XXF(#fU~JEQ}!TLVe8ZW(Tf5(uWc1YS#l8E@86W$b1KGAuNs1#p1Tmv(#?v$Hqpd z^}j7heVfxlT*+5JH)ttgD-$r)+=0Pa#uPrqu+&Jt52i34umt=+^eA@1C4S!U-Hcab0+SWqIwO4SM3t1_MkV# z_yQ)L4GQZt+0;pfUN!O=4O;w-GQ$SF6k5#=71Ym@py*=z-JYb>R5EN!OBB@D;gYiq zyNON8CbQ|J;|!G7&8i=jwdeD50o-2?Wj-XM?w3URUmVgicx$lh^(2>WEQVHai+b%MIfeGTIWJ zOt{X8iu!)En~YQDhuHn|rNpHMFz9H3TdcCWUEq|(;>Qv4W;3sfdVaUC=P3krr1@O0 zE|J2)_%pt7A(6K9r+N-0Bf|#ipxY^t4<^ohJQ>mK9H1F&c|Q9k2f)$q^;fc$e{GZ3 z7pFgM$wFL^Pd>hSm@R8_X%gYB*{$4`_cg#GLJY0&=g^rTau3`zEXzSrS~I<7JQ5?` zVQ0k)wm3&Dgv?}1fR10SNZWN970_xz@%8z>&SNS`#ru_-VVhY{xzSS?Yzw3fcgheU zV~bx#>tXfF>ONSauplzz0}a300h&(7kfur$&zj<*3X?Q4yJg;=xOGQ-`6YL}KJeRb z^E!`n^z*^@cQrR!>?y9f-I7ota!Fga{K~rS4x$@PZs6KavK zn3z2j>@zvM!QJBOok92Qb!(jaV7|4?D?~l&ISSyD|Bm&KP`juYK`8QNviqjdf*|K| zlUo|UO|i_5UHgAF*=%>HZZyj#%I|uLVbIeEMTgB2YA?zJn`LpF817_>>$;)+DMnsO zI)mDx1{@E%@2dStucl3iw)fQ8aF-HU)I`|F4Xl7q`Q6oE;&xHMC|ZJ^l(Zvd=JVBc zbZWcExR^^ku+0hOWYXp)TieBMRoW~gnfmz$h4VaQM@dtaN;I_8`Dwc?zu7r-pGwRN z$*i)IOm9&A-Q%3V^~O3`NHfVcM5jqCHstVtyRJhZ>vvh@whI9gSUA86{Ju0Vp{)O6 z|0J0n3Ob)Y=0T{dp*FA@__iAhE&L2&=1*Q_+iZ=YbNk(ZG~4d8ZT9%0`!2n76YTVr zWI%ctZPG>JWopbUkT+Tk(oy%D+0qiG5^i4xt7lxc+Fa8OjjxKlIVgM>Cx_ z$)PmhLcO6WhKlBPGt})5f#yd|iuvOe6ffx`j_@wHRB0e*w6Svij@0nIzsqoPN6+3D z8ng*zBzfucyWYBNCg=oS-7I&w6dyXg&PW>VXnMqAUfF7-gMF6}AglN4@cFc8!(%`# zd|kZJ@;(vUi7j)=d}2TE7QsqN8=E!q%W@r)KD`u5-;HtE7Eh*ijSo6GjB9o%qh!q>faNSq>{EdvcKP85 zEvvG75$o3D%v||^KpX$n!K4pkVi$ahlcMJq2_L86B$J`sZdSE=%&3C7{xYaC@8G4|Ja zCo`bwe(gGe_*j(*>D~G$E?h6caPTH6?H=hB$c)TPe@yqL(pO5|0V4BlG4aWpn%e%DW)JG&G?m&=AiK-0bNH_afIF%=73utkLAv^+tz8?0)OhD5i;1BK2`Y{H{L)(E#@Gs@u5q;$rqjvo|Rq zin04qQ>*WNB#gv*Y$WhN!Dt8MErvL$Q!B7)!50%$mbpoXQ2SW;%g5%@D?I{Lx zQT}z+TZe>_-v-+-@TVe;$&egTUUboNWqZTA)&)*WHD*@bP}m50VuBjU{Et;?0iUjm zB8A4?uG1)D+pKGPpyo(%fc@;{2(~JXr24hx@VjX z%EX4yy=|nIco?3~g;(iN{a9(HFPBd>)Q>);>CxgWaRsn=IUkj%rgXzYC9cW548a8uV^L(zrt1sGjRx~5Kr6m(d~;`V8TV&F#z0uu!(aUWS=H7fg;T z=-pf-O(5$#x-`^ibI7U++BF+okUg4Z*fG^Cs46YMB~B*;gFcNi6;({p`6HluRqVD& zqcNw_MJLrJgU~Ncc9K+;v;r*|-4cL`7TGf5u=x$U3bd)N7mP~ua^^93zwd;TKmZd} zlVo1*LWGti(0`9~K!pe{nW9*16uRE!9EJagEDO!?B`2FGcvukib2H_k)ht=~Kc?;Ji zii}yk`*k#-?5#jT6D8t%c-i6+DkK*eBu&0i9(ndk<+EwD8&Zgaio~Iui-5?;qi+xWCGD^>gbQ?6TM)-GrZ8DMc z@qt^ic4DMR30Jx8QHoR@_1^YGH*c3%r_(Q#2v_R(uXfKB<0iLdX?P1?afPskL!dJq za$(t-cTFWd4fjIv+WgX4VCkHw_~M>SBBM!(Af)gyUXg1%W;c1S7anG?@A-}@cE5;p z#n;?Z6!&~z$DEHeQV(K7eCj~>npQXHaS{P&oLne*J~PNmnXTF-Ei=h>{Iw^s?|z`? z!cE*zG@+nmM!`?C#Wg*;l&8i&M(5LP^$OMT`4V2h9LX9h#d4unX{dLe4}yb>j5c|u zv`&pX!}t!Gl};N z;N8fF*c5RyHj5S_YqV`|vZi>iEI*^~;F>p;$tOBqM_YKITkPHnhkme6jUC%p>a_K_ zM^Z1}9Shg}&8pez7d+ITgtNUc4C^YVN>-Lgi8GTOWwq_r3LfNJ7^a?@!G@OGS#tg! zU{<3&UJ(pwsN@ywa+N@;-~EHBnQvUGMmdEhZgF}2KKb`#;waNuMvsrBH-Eh)oT&$0}bP|)WmtNoZAvjI^gCqvpW#g=D4l%hU8e^=*zXNe)zrL5J z3AOMj-~XqRRqQ7y`mCHQPGOr922V)4uWHap7NhODFH68fR-SUHwui!Jw2&w(1NgJY zF%hWp<8X8AC)bTf?=vYaiTKCMGNgLE-$-_#Nb70bsfgrZ&i7@kfDeBuPYBWHa+cq% zPA02O;}7sqySLn;LtKt1^D(GB6Fjr^0hs4rrg{ltGi+pymgH?d`QLic6T(|$+SEb2tGYOS15Adu$`sM z;R>KmJ#73g{3eki?=|}K{04d9kEd}OT6406a0_((3+R5|j`g`f{bq9-Xv;7#{aNrv z8ozIx;rgEAFYk=tr(G4yqNjXL%d8VKIhH|!yKf33a+GWEfOd&;JtJjoC%)4{Y0@5L z^F-Am)BXc+vG%BIJG{U%JoZruauTp5#ZfmTOm$vq!9t(A)^uG|6XXW(<$b}? zY=lM!Zx$<3D0Df<0GSq^034^L|5RGyVI63OBp&tdxX@Cq$eDh3mp2x`J#QO=LQgaISl4B^t;0{HGT?p+XxRVZ^=-{aQB;ualSgpM9z^=)RgNF|r& zW5uznB>_u7iF%5UNt~qOXlm#6rgeWj;Pjubs_-TecjN6Q}WgA4j zvg*|!dlZEmHX)1{pU@e;LpIC%R-fbgPLu@0ap#PgTfco|6#Oy({B?c}J|d=#ump@y z5UP9{_q@M4{AI&D=Y)ZAG+`{9pE9=g-sm@|HB>t3==*LNMai0A6Hof5A;)P&6vqrf z0r~B0asCW~F>aa-QS?}TAjV7vc6hAgbj_}D|KG%!(}T&USYk{HhSb^cX)awuhF@s= zih!{tQD)ROIWvh)N2l5mOsj1g2K3W$bgN#%b^T=>wf)RBiWIK)t4lL_nCi!-$x(*r z0s<`L`OMpN`$91RPcv=scxYUvnYbW0f@R^y$);o_JP|_+1Xb#K!vepM%48)v&3WPo ze5AzDP#}fSoj}6eGY~@cK8Vaj+YGNCnSGZJKZb2>s+?{UP`lRm*R%FUgM&vsyv5OK zq4jdXd*0(yI^eO2Q1Wypm&w0dfxh#GkMlO?I}38P2YXb>i|^1prPZs80B$w_gqAWy zCs^TxFy0qoQPjmlX>Z>sKLW>^KB@>J@KsfpF!DxPgBOiNjO9KgJ<6aYu*)El_+_nUR!dolB%(UarLshzlPf#OS^PnN)YPrlJdl%4jsTe3YrAr+y| zXU%{iIibh?Fq&eUmB`=8bbX&+gmJlXR1J2Fl}$1?o>!*WAx*RqEGYQD)VRfu4s6m| zF4bxBpi6Zs9+6u_`IDgG93rdSva%-wk^&4n_m(L$8$1eW^U<+i^R(zRw}%49+)YKj_~l)JJSU=oRqN{gBtetQ*MQAYupYS4pJb5yFi?@sHc>f`NY zZpm*Cn3d!E<~QAPrPEVlkdUs|gOuo@F%gc%9PW`@Y>#FJv1zv8<@#;fw;kIVKc=$* zF>A5Hf5#@F>OW&S8y%e+Y&H_Y=N)Qo)|wRo^Yas%8R3hkPEQlTNK=#3;%nsNlbb~C z*(=iYZd5i~ZJGe!R`C1Hs1|SN<@Wwbgw%iklYf_lqk;f8HV>K>>*+L^Io)UKl5ua| z`OQ2=Ho`OKY#{080riA6c#9H<{7$<3x$~x5j|#22!?l1C7Qh_;yt+mzpT`-L(qPala-^dErtGBFt72*NOtnF=ZHXgegOu&+y%QRO}|~ z0;4~T@d@x4zpP^Wov}}Si5f0mr-I(hnbkR9^f~R-I|UeX{*ANI_PF3idyqnWftGeH z{A9GN?lI;~>NxyBOc_b};A)Sg5DT!tDRDH4`aD^{J+3mWle&zdUFWgK!RdZmDEf+2 z2=S)F#4&F+vI*)!^0-U`bv$W)MbZ3gmb-vaT4YDNZB0>S6&Ybo z-`o8Ok@(+2_l!^kpy4g1ps9{|9^a6`9Edf7c$5===+ z!`fd%fPKwiQ0=4kKQHV73hEYocxLt)`U1SUr$aFEKCLA$TyOs_RvpBOd9IkJOpM?0 zmoB9b-~PD4KQ_bBWu?cC&P6?Z%R}ddQhq_Cx4n-~r`I~!`Ex((> zGSjQJpzW5}^$89D@b4z}^4?QeVj%~6iLOt;9v!RisD0L;WR07-SbXyWR3+`vNm42G z;n=WWbFta{!Tzq1(z4^hd9?)oSFE@3~_)2bGgz@w^LR%nJ>BF$jCtEBiM(dqz9ycJq2jrWKbm zMDD@v;HRZldkXhnsenQ7Hw;(PbcRzHNY3-&?Xl~(`30uE<%V1?*M;+4=qiV`0}!4( z-6M-F+fZ)4y{X*%#Q2OOHauRWDc`}C!zsxOJ0e47Rd`h@*J6*C@FBOk>G{LC|F^jO z<)Q^M`ct6)aQoe;Kb4G^R~R3|+ae>yXg#3=haj>@G6Gf!6Zl++=S;NxPUW3$=gZuw z^{xMDUX8{zZQgxuSV_xoQ?ApA{TVSlEeZAGyK?@tNS(q#U0y%)>E`(?da4RG^RwJL zhjIj!9K~Y%*HFI7&Ht2C7Qw-#xCVZCc*K|$um)gpu2e)HUH(1z1^X}aTEfoL_b?QCUm4C%&dA6@ z8EwWddd!rvCexL!NlWC4ey^{hnu)r_7K8O%n#d#-n!*ad-vlun+BDTN7@|*cH^Hr) zTP05%%OsjgnM!GMJk;{ro#D!aRbsSM+2R!a#YUeOFbbv9YLion(*Vn(_TyPk=wkT? z0{Jm5h|4Jbi)JF5H{ zMq5W$`rEBb$fiEF9nRSrjdYn5FeV=4x8(#P%UNN!ZaQ(@AOiA6Ob1-Xi7XgFr9-(@ zTFgQ?u1XZ?Kqz*wNpL;r?RU_1sH^42=PXGd3cxNmsWYA^Wp8y$Z2QxMPsxyAa>VKusLDy(Ye}};*CFO)Lw>!nsF?-3> zRy%r*2~?KM3;Y|8Yst;=ezHF*Y5k#q$0>n+{yl-C!GpKP1Zqi@$Jsl>s;aPw`>O`^ z9O`_#u6IEpmP=9EW6K%wey70mkB;M^f#>UTU9qoN*wp7Ie^UMrqGYTs*p3oa?6Cb< zPh~R{MN6if^!;D>z6ds)>y1mirpj7jidadI9MmvTABEG=zc!E$LqBP#a_vG{B_>h? zao%5hyZ-6b2vmSs`4h?2o9=n43^IT0x0=T(tH@h$Wps%ukXVoH+i`>K{Wy!t^Qs1e zy#<=OG>;s7Br%qLyhf6m&0fV8Hy~tJu&bE@pQE-Sp=3ioT0m7&yYtETD`Av&GS>7`Qor?h>9Tz+gh~4ZBS*Z3mkA>?p}CMSN8X*sC@-o8Q72mc+LT!7|juc_Z)HS4J+KH$SxSYUH3S&e5?C4xYR%@R& zNM2U&Qy)!E>YG@db{wm!ilz|$Meq!ocT%oYF5%h@ZO&t2sdai@<*Z*YgIi;3IrGLZ zsH={{(o73C*J}vxGop$CstjJK^Vu^4mE-iUeL+g6LSC`vG~f$;z_dIYV~*m$!A-DK zy1?G)sXd>igGok3(&g=2c%^SeaIKmu!^o~^`%Fn$7SGzQ6s}8XQ?7cdt z>clSd>1ceuYR-12*~J6lt-tg%TV<>#%PzI~;Y23RD$jg*FAJpZN%b%*Xl@e5A9i+A zgIuDdh*w~hI`=%^J+9%zJhKaA+q%U+rb%JBe*1KC2&^AkRVx10T%@5~+yP;MI0PzB zt(M70OYyaJ94FOVeN#Lm*gtx=Lj)I5Ijef?JTp_Qi-hXi_kWr+`57ut;l0x46NLSU zQTA2oD>V;yRK2)NpO#Kx?Y}H0M1k&-l|jva9mzpoXx}p?9*$92nJbcQnSKi{EdEz}@73X1cX|VU0md<%HampkHI-Q{zQtZz3%sMDY*91qs3*^x!PRE7qW{-Ru%o1BhQfy zEUGPT8!i@$O$c1td;;kWTK2*?k^1d@}za1$)3kZLHR3e{17p+uF z{Uh3k`vP@6h|Pk_OJ&)k1g%<|^wvL}?)-X_>Zh*f4$cz#n&l0zKNp>AV0hs7GO&oW zcHca@-KY!j)>eMmo(CoZkz#a5Nu2_FtxmNg9S$zrPv(z%7^^Lm3MEQa?8?a$rP^_f z_K${egx4d}Yy`Q}HN3i1tBVXxwH93sik8GfNScz8CB_^)A-jy0@YCUF3V1oG5a|$| zN^$|(l@ILcH*cF$ZQf`yi$F(wzWo+g0PG)>$M#zfh3y$eC!9JlrUM3X5l)q3kA?Gy zOwt5IxVh|wkx<`}Ou$7IF52eYU3ujKEx}etivYDoCI{UtC2#^xCxeSL$$%jOBo_{8 zoGlq*Up=Dk{WG!6W=y!}@k*oJGEAiWC3))(z`QUrN22=q^t6D5soMFf04lmbEhtmq z5|re-;(DKsPLLCuywTM;FirV|*$T>@bd5%wXwK^b3-S)NamHy+;s7YM*ui4-%k%R1 zIGiwaDLPeFBUNUVok?%SzBYwT!TYpEmn7RsZQ}ijGYJ&d33uhBzipgrT(ej1FPfU* z^1AL~N@_Oe90Q;G%j2LCJYwKJwM#4XO4zz}Gn>CGbpB=nDiKhXg1-c#k~>-7b2;Gf9lfHM(Tt)aSd!t54Gi@NU8 zFSYaDXIGJC*AqqJ05Qqrd&|wsT3zN7<05CV)qtAI@|Au_&cDkdetquvgkbS z5;&^!p+-xn9fE?saHbr37$f#C7#;idfOUu>D20Syh5okf@6ZIFUHoQr{@`=xHqg|p z$R^l^GpVK(g%mdE$O2>G_Ul(Ydn9!~r?dY)=A#-)8?dk$jEQ1uBk;J=l#~ofFxL<2 zMoO5D%i&^!jJ1rF5L1(M85~(SRWBQyMTiO1Y`}T`T(2|k-0qn3Rr6RI5dH*z^9Hl#^3cNeP9t!j|TW#Xi{cP9^7KTQkq= zxR=X6Gn{wT<+kh19q(Hy?wJ9FL<%Q!_$)RS372SxdVZqNks1yH*8no02OTrh-Knn+4gZ>!b-7m^|iq{%_>m0K9Yv7g-ozX4BGjOud{YsPOLGu%_)~-SAgKop3{bd`dA&E98qSt6}$B7cdsQYe>8Fh|x z=4bA35+()PVk^-HqTvIK=mjvOo2`oa1GB2qAu}i|wUm*HFooxDGcH5nF&;LKXc!Xj z^Uwrr7u>S)Me@t4r9~gRhtx85pMG|RA_P|OsM>V%h5*CIy4#QhzE|{wpoX{HPJKS9++iE;lYt#$z|B#7tXuOoBEE0TC!5exC4}C!#)g%(=ooEp zb7K_qQd#U{+BZdHa;nW}5Bw6ERmrXfn>j8UYm+svf0G>Bu7W&Jw0(txEV;S#2r(?r>*xe`U>+i#31Z*vba@nnd@Aj60`O!Q$ph|<4S zCt(X<$&mHnfEwVd^lLJ+ccE)FqPUSP3+q57&ZK-?ejzOm7x$8Sf7Duba38?PSR$Y| zzf`&m;o5YZ*=1{Qp}*){zl;0_n^Pcc$Bpv}*gy-M^3PX^2m4S7L|O}_-{T1}@jI(t7rWXQ2&D)I<4g92pfh%z8}eCA*n8CA7cC?LIAp=+?NrvNWNoTE!PeffR zwuvvlNV#Lqd>`a^vjf=VWUMI9;5enR_8~WxiXMRa>j~GAU^b7OVOeCX)?2bvLTzXF z--LoCc>{ZURBnN~DmE8@i2stoj8nuJrsbwbo9J``W7~WqYO_Saob#RQa|^Mj5<8~L z68yMd{h9cB(FN@%B*S%XG-W>MAa6-{L;5t&bNZ9_dFooLA;wFN!DvQ1?POSGAXe!T z?%v-4(iI(p&8ubCd4p1+-iu=9F00#Og+k(KT`ycZbl_GLF;N4V4}+cz z$%cG+I+O5F+oqjmIFlvx(*;YP;XtDsmW zkn?fnhkVCpyp4{Wl?*_!kf>jnRG{3el7VCx*hWVhC2<^Qu=r!)w>s)IS{o{1(`%C7 zic|D*CSj94q_&ECl5;BNik^m(+$UA7yx&XeW6yS z*+09h8RAyoMJy`ua)bX^QVb`SU@-A-B5tW zIqyZjV)jFIl>NGZi zl|dc3ZrAI8K-RyuUPZ5Lae`1ogg&(a2q>22Y>W_o`ZpU;D3Kqq&ocdL!rQiBz1Z`& zFzMsSOGL`H<$~&8uP095Ph}Iy{~t}?99{SG{GA)yb{ad4otvbwlcup6+qTo#W@FoS zW81bG`?>vmpWoU4?zw08y)(0;oq27wFST5HQ;M7xmDIzkw)qyTn2Fqr{0z5h<`&k; zN@G2|&xW8B0LnDo0p2nh(!lK-mrQKLRhQdIjqpBJ_|q*gp#on> z`eqH3$x|(cTo!T1$!f^QCNmV@tBGno;?v`3Q*t(iH{V^-8iv|X+Z2TG8F8{)<0;sI z!r=WsgbYD_(6*Oq0b69RUzhp(>XF{mGveqCN$*rtR0;u$?9;o387>;qa4xlG z!+LyRZ(S99q{!;MzQFVPUnp2G%h^G~Q6z)?PNSS-1Xn)4HBxr9?b_IU91FEwcHB=t z#hSw#|6Z7qA9Pk9{Z<;Jfxm>w{F}1yz&>u{pdd8-1vQfU?PxVZA)ZT!u$pCJBVqQ7 zb3;HhWIFZaiNmPQk#3`PpCnR1*D)pQy~iY4eK$!-5JHd4`p5;hl0= z(y1#|>l)44l`WA?z%2=uFyCac3WdbbLm>DGteR_{Di}feM?mJKh#QDafPF zXwv_9G7C_2w;w2-L_<}nba`kAzHBo*?~yQlCjira&U?Yuxel!+xkt#ti3XUFcXwZM z!hQ%zi^3jqTCa_vEPv%2|bRo8kF2zz%@qE&IAH`|D{Gh>_=Q`0dGW%?Urk z=w|(zK@nCSdCILgtI?{ z6}(*jRe~z02ItR|Z-7$5;nQHf1{%+EBpD`8&4O-eKxChlE6xL5c`dpx2fpO^T8{6E zad^J4Kz`So#dvcamP(mLMKB{g4jYF#^%VbOo$SkrL|TRp>4r-Fd{Jvzv@q_&*E9Ay z7tusiiUFam=Q8Sv8@SZq0vqD-y z%b%`nH^daov1A$kL)?Q^DMQ%p76%=TG6zQ*G6%2BKP2}s^A81fp7sjF4E_A47;+{V z^AiEz!vU0we(JEvO!&%vfeFXh%$f8;Q`yXC+X@yY6*KQ2eU5`0QNN~%GANS4iCC^9 zZa-f z*I{)m(sn&kna$HlhkwsD-TH!~=alX8bMRNvEpw|nbk4NY;bcZcaJ7`f@IB7R{7MET zH2=V&w|)o=3h~i+Wo#zZ&P%vnmR$#;G>%5d7LYdjrm+2PQi6gnW=`Cmn-o>%hIE`~ zVIac7duoSz#5p;93E?n2o~CE4>HhCx1~aW0MFmkQTr!sQ#aCM_U=0goLwPtRB9)GGa9i|ZU8W##l{$)3I%LOib(Ms+lK^B^js z*J}6pwQMH%a=Qj26GSxT4bR892KP1iLhH3f*2~Uf15h{`gnw<1!hI-DU!&P(?qGT% zq@U{~Wm{!hDd9NCjw5E^uN)-rlt0(|{Lmc)VB)#ND~ql4IB|Q|7Q25 zimy#?F)UcBzs2oj9g5sJwcEJan4^isW@X5E+HMtyzL#B_?!rz8PAY2ibPpF_{}WF| z=3%y#yX8PY1Hv~@vL5<3-~8LEK!Beyago7rDiU)M%3-opJt8Py#NK*5Z)89 zalVA>-d?5>?Vfj)bRiaMK{B~~}*|Rv2ygKr{ z-CZpcntVS!)8EI=#2OKmB7{Oz#xgi)0`)4Uwn;kXNT$E+XovF+S}ZN;@VETNfV;CxlsMO`U!xqUW_ia46>DQ9Zd$95&L3 zVXFI(WS4G4)8Bc!{`^#rFh9?q1O0WiVP4Sa@V<4@hLJ;~D4Ro+jd8CzKFE24vkm4H zR^SwUlKm-v2l`P)#m{T<4uo7Pc%rGn1t+Av-je-d=H2q&rQe8uD()@slG%2>GrOPN z8pk8n&)nA>hFq?WF{2UJm@ihRI+z7@5^=*Q(*Ey0NZ$z=-&=m92y600r%~0Ob+ys- zZ1>G@pQiE054Z$li)kC?LSa#Gw{g2u0J|IQ*7mynfq8g&mM7TAp6_Q&{==t!QsmIg z9as`-hliWAa~koRXoMgCQ^=ir6}b8QOJ>fF#m9KWEcM)U8L9aV6GTsQC!ZNxY;4h1 z6SsoUn`vQZpryO*`kPnbKrt2UV{`;+!R!qd-&JIbihZN`hDgr7Z6JtEP4)ubdL3w? zJ=6Y63^~M9{qq!SjDr1MA9R%%vyYQwkCv;66ubLrzPs$!X4j1uF5&fV-f6;cMYx!n zd{rc5ESR?P<+QFtzAtq}Q{(5qr^;>GwrIbx^JrhdShEL7G;KfS#^@4Nv23)Z1)7Bj zzLeE(9Pr$J6+fx@nox8Jtap_}VSMBWGcPeOHnE*`cru@j4xVSt*iCPTd!$kOoj00C zDH=%P>YIH#-$X zHxCUgStGJ~ss(0Sx(NzGmT%|J3P_Qt>TA&)-zl%LH>A9ZH_pbP@oOf;)Uz|w_q$8Brmb)&< zVU`K%CmnJp9f860kmFNAEk!OW=vg7P=U`6uKDq|7cV&&s-n75gEEH4KEEMwo;N3Tw zdnw09wl0-aeI8YWNNHj{S>4e!@v!;6^gh4cPGvE76CLd*rJ5AC#Yf@ql5OM2Wo&yr zkAzNg3CX(rT^HY2(cNaq;F!TN72v51B*XjHZC35ddtyV&Rz zA^ckyMDHrz`54cctkdQ)cjURnzOyIZ|xZ{PvzW4$b=vAmrjzV(DH%UQfxSXj*aH7+U>dd6P< z_HZI`achHp9M;h3X9IKoz9M2r%u1Q#{rq^5GNab*QN5d*ya zthnP)5)?uvS@&s)`Kl}X)YK5O0fl0lcqCcDBgIoOFjUTI=03Dt5qA0<#3$-5>Ws9AV_SN$D*v++EWz*y+5H?Or*Rw@s_ZTC=}kZE-4zS;r)jz4{XWYXh6 z4#!AxCKW|c%y)!5aH?J~A!&U^L1|i|_ft93b@P2dqjtzCHC0AV0@7IC+`fy=zK{Dz zVJRI0QY9WEtDp&b8o^8a|F8fgWRHX=cQY?Et90Ir0=27ut#n=Rfw~a3Ty-`6^p60d zZQh?J_gr;Kk!066t?%4NCSrIp8l4Tw2dE542z$Zg@qSdwY5z)Sb}H+QfUf7jC*w5B z@exuWc(;JhE{mG1(URLt&2quEnq>m?8K~(%sifVGZ*ZCON(uFjFf%y+VA9)NcRDzr zij1=S<9uY6gK~DD39p`wHw1sOc9rP&=&9dJ5dVT&9)DiRt`c6xmsiORAHY zGb1HmAF~vp6i2L-KN`u0Y%SKdNRcMdPbtKm}vE`I!WK|Zn&{+ z{*h#g9Gg`u5RP+)~p;!%KA=ssQq&Pbi4MvUxsD=!8u!VaGyrmBf|l$)=4* zR$^74HwSb-+*ellkt%+glctmF)KW2z8LsEuO8_NcsmR79*iUca^+u4j1VQejBC%vY zaOLv5!7vX_>Bj|FnoZEsiq6xsv_94#J!U>XqSOh>9MmkXbr^ zo6I)YkD8C(a}HCIydXmm{n$-hEfNg^aG5tusNpizPyHrQPFS0a&5iQDl4-fs z>}oqGa2Ka`2ZmvtV^2CR6SGygW18vPSg&7?mSa^?-r!8mGt=v_hJek5%7@#Uam9?j9``3#Z&A`YhXh9?BdC} z3r$y6=Z_swZ_GvE=7|(Qvez1thjIei^r=(LEUli`nA=)8K6AWEypiA7ycSJ_A_C4Y z14X_%cGRM*Chu43!+m>tmzshK7j?FGQay1o8k#s%Ix%Ko5CcM=y3OCA8!T=Gly;rD z2fnHtK@xW#iY_XZ+l8(#kEHlH^(JL_8m7Mu^cvxA>Tz{HY4cK1Z zrJ-B84iC_vh3(u0){nSf1f!+9ZV3*b+#cpJpI`?6RDtaHn!sAXMea05M(e!k^C2jg zkYpb8{1j}`HdySaD5*B__~pp=(?eq?`fN=tXX))(*p#4)xmgP=sU1B$!=6{?o9#2z zQMBdM47>L${>!G0{JFBYQO=<)FAzN@ z+R#kGRqdmKNhkAxXxxctkGbuQ*YqP?`yj`Kb$NwrLx@Y|wS*BACF^NrHn<_3hc2eb;yzRGL zcmjO7rq$>6SVPW$;X50Z-Y(|5T6m>Q7WoUF1w>@R%^h(U9;?w&oHkRmtEM7vnaVzk6LPyDDS zIc(FW;c(xx9>iqZMZr>LJ3OEvglR2u+&AEQBOgAdd7`b+a4<&cP3@U7F9APV84g*@ z-3g$-bc8QO1Xj4vbbJf3)6a)7A6w#C(&43ueF)O~+|<~S&sVw5pbe8Y-HE&b;KqFL ztYoOVozW&ygtC7Xy$ZmQK-ts1tVpwM5ZBcdES}J+G&bE(%5W7_?$Aic z68gMp%QBOclXGr!y}G;;k?5qZ8XuR<+#XM=5k^w~6fWi^d9#hI4}ND?4*X4j=A?tt z4S8mf0ii8onRNinIs}Vac+yWTg17X`aWiW9CNV5!Fk&KgwXC3=fYAWeyJKJ`?V?NB z{Tg5p++0N?j=$8WbG7$6wikn#@P}rZIwmaHS`15B6V~uhU2MVyUZ_ha*YS~oT)O!I z@UwS!(Ba*>e_wcdl?r)B+b61nB{k`XwLYWuK>ptYprNDw;knps_Zg5$htr4!rGDH8 z$2}O@K8bBIa(MLk&mWW7=Hl8cJuD?X|Dq{92A$dHeoh9>Yf+6PCtKr&ufNLu5Rb4z zGU}MM+wRWkV~|Nguy*fJjgY~mFQ?_u$9JzfXH6Xdg1ZaO>v6NiTfbP(KXztGkwZ60 zt9O61p1hYQP;L(5A@|Fq12GJT!Jbf&pZ&=7jCW?O+>g833zQNdX;*Y*DA{#)duEH7uIO(_c~a;cCn?oYJ^$A|u~FsJG3SjInA6TkM(=-{@5E`~K!Ndy+QU zcOZ6u>i)7=mUInHR11-*ry6R47}>>lPSN;QB|jn-@+RJ@Fp#(uMe$R!4uWW-?J+s@ z$`gpsZiASCP{`d`gW|%PJ9$6VW9fk%SfS$cMB#g{?Ed-=!faUo;jmO0*K3+hTUqNL zY)XD&8eqGmr%HQkm)v&^6DqBpPxWLYQ(*eZR)Epew^PsBL@7yLzq0c>YwhqPu51@O zmC7s_|7-40I)6ycn0^vky-q*bY0AQqQzU4*)GaJx?5=eKyj{KRfmb(sY&>3Q%c1=3 zY0gNa-lTFoTR=-ZidggyesmL#Jl_SoIdKkYpgg})o67}L6v5)04t&FM17sD#jgk}Cbgi2=JqrCHKe5kNYywgU*N#iEo=Oj!PED&!DoqT79% zY!x_dT(!Ygw;T-${2Hg6R$&2WQErw#?Ljj}14l(_ z>8O*g^>+1t(ynuG8U))-(w-kl4)ImFWuHWTLAKr8oy;S|p^Ac)i(12qw#3Gz1K7j@ zfdRNIx7*FdbKt*WDd1Eo=VU>>!{*aH^zG#H2;Q7W8m=ub59v0ig z-~@dv(y$fSZyiUXdeYxhKy$x6OK0ti_yU((cQB*|Y&6`FX{HaO-5y~&lZvs88>}AH zn7r5hzVC+kML!mPrw@)@Rd=b1f7mgf*AqqTi~eMW3!0qaIb_Jjs7lpxgn!Xz#c0ZGC9u)F@g2Mr%O94q4wV0%JX`@65r zeogNXojig1BY|d1gM`p{&Z#c?G_G1Rfo&A7BvMJJff=>(E*)vNfGJ4ukpVpV&HP6Q zO9izMGT5P?>SM*8+}xAW7HL%RL9i61odmrUg^U!!VEOmm!#Yq^f!h*kk-oLZQw8<2Fdvpd^wJ#n)ldV!2UkFm`&h)mE6E*w+%rPTXj;l91<_jbEdRV6y2Ps= zwe`ToWxB-2IC-D?H$|0ZFqYDQJJvtW7O(_&*VjboNH@RnQ7(Irj(+Ec0Cds$bs+yx zBQ`;*fB$1W|J5houMoNbPyDXKg*3%%NV)L?;@Ng~vP7{su;lM$4#s5QXz*>J1R3&&5LR?b1b=QI zde%b6kE4xmCnFl#~!GvG%~t5%&ESOoRkHd4wXZ?9PgVz9mJc z{1RK9oYZ@o7f0>Yu!9*pj-#K=%O@Eu!S*CfS}Y5{$e4ik`6kKhd|6Q9<9SmU?^o74 zjoae~K^&v-X45}CvXdvX&wLiqH?RJ~Nr=vy6#SEe@L6>8TfDdzfK=YSynz+1ng|08 zf@j$7c7G&xzZSn}w>hLO7}~NpVn@T^B+16BC=Tq zm~vy(LQxS!d_|g++Mi^0h_cLD1Kg&OjXgNwVbmt(!x9#W5(AN<^5xG=r`ypTA zYE9q{1n`8vk%ftNL>AtXrp{`gXNQ2ZCLUUK+Bv|X!bStTBC#@gSZ+eAPZF$GWC~Ps z5cY#U0viwxf*b>KzQW`65WbkW^`Pp4J@8WH8JR^&8DGLGhH^E3JdZLnY^zIEA-r^; zUP-yloopmQ$-z5AN$B(XEhh7G$Y0wa$R^(t^S0mWk_*Buwxfd z*G%EnNx@nFXlNDo3pq&;Lb#n{sU!A3C8HoDu0ml>hgay!nC8~w5s9jNlT;nR2+7xF zqdOoF<0M*B8wx7jeys7}#xmmK>QmN<&YW@Yc{aHo9Nn&;`th`gXU=LcU(QNWjEfHa zyAM%NikbHd$Mc9JRUnNBd*i8eUi)^mk~nI9B>eD`0FPWA*!b`sV&!PYex79HR)UPI zk-8jlZZ-!m4QJzzb(Vh5yunP0Oo|!OlYu4ro^Yn=)XgHHpy5k)$ChV8xTC zu--I@HzO6v#7;}l7nNJDJl$yEqqqcB_+j_-zmYbgT^=UgTei`jmz5N{@<%BeDDQr~ z4#AF|=0LO`LJQ}1!`rd#vtR1az&A9e!j;#pyVLa%R;T(JU@q~MSkihLT5n^37%9CJ zj_uDj7vX{$^XlNw(I0de)E^WipDog}=nndse+ltepNuY6c~W0s`&Th`S)}ZbXx861 zQS;tP;$Zq<2|Unj)X1nZU1!aX?>3*l?-#M&FSJ9+3^=eAd)pVR_1vJ1)_M=tfNn!l zvv~BOJ}3EW$(-XL64k2|_ti4shdRYQ-_ki_(xoSX)qUofU+a* zwZx|$`pPr(w9Y3M+Nw<6^VBD@?gFppg)hx&;lULaR+0 zpWTCC_}J@i9&nPV%(bjI@#ePzv4Foj;-P-tYF+>KejgW~noe8pAhwC?(_YX3+b=a- zK~uB{uafhZR^3gsG^O%!opBIy8vP0kjnpEiFL1@}B!G;Mbos#2tW5Ef$=_PP(oSEk zQI|f95q->m^#Dvr)tau+{uV#%eS|zVnV6R|v*m6WSPh4?&VxL!`ifa$hb+#I`9YY+ zBCTe=C(sv#Gu~@~`}^HMy%1%fZC6kA$j?wjP4uE)6AbI^PtK`NwG#DdCvS>kKOWN# z#81S;jDm+)H9b$MWCq<5nru*|dsAZSPkdvBc3fGjZ>au=bo#8luw?$?*Or+8(qq9^ z(iTu6#V;#2ONUe@>I?tL-XL-U&Kc+;GjtP@^$dosb$rbhRdGx?H$D5MGi$ljjjXB=G z_){|^Q?TclsRm!(xPwpCu#STB3x~Q~D%{T&g7&$3am^^Cg7t{uezQ@u@gzZ&!%k9I z@dOhbVKWOZ8dy{B#!3HaxH?S<8#GW`z5wAh_Rrc%hlM8wZQ%O^qp9v*XL+kUr|GJy zqH5z5)Pa53>a;nigCT>zz}1}lGXTLLtne!~-eSsb87%F3ofP^{tz+%(QjC}%$-QQT z$n36xOPi0O5EVXE;g2cbCAH_=+&?>gWhgyTI$w|ypTt?_gA|7UJm?c$f!a-@4!}U? zp!*#e)cMc{2=}_Y-A!iTwGoC7ms8Ldx%Si=t>KkdyM^FnAv*OZlQpd{ueCwAvHKLt z0_bbQmJFS*MU~pcL6-#WrE{iiX*=Nc{rq1_3bJ$OWO99}IP4yI!TPs|3i=*CoZ?{KzH8Rob9&Od1=SRDjH|)-cyWQSxYS7E zfV<-$KoEp0TAxYAWj^cO>nKSoTtR9X9 zHkSOJpbyqt=&x@X7fQVV`lpz1i-ursT*BNuH0QPwI_Zm!VHphboOHxJ!R&3A$i7~9 z9c>K4<3)LSyi_Q1I;zU(D*tc6we(M=^#TXuvQqM4$wRd>bg^kEDH|Wj!DT-k4T{pm zZRb(Z+87l1g7P79^S!Hc(Ek)b8~ZDs@yHVTp^;_B_0ST)zraqOM71K~|*|2(mCmD><$D&K(lafs!Koi3dCYHC7M zRW*2yTx0ETWD>pOSn(MafIZ*~I`%zxf(&Y4Xcl6}e86p;F&;S0$;@~MVh4^mWNwG# zE59ru?2m-Xg#PbP65dTeu1W4ytq|Nu2^9h13k%%e2+JJJz{kt#@X2qf^TUmnyiUl5vJ?}mb!QHhGYlJY(&V7@Y;wBW@J;u zLZOBLPQe^$XB5ZdB;UmkhvZT+th$ZDyZvGc(1ztQt`2+*E&g>9yURHgF=@ZCUbm(!&UF6w@^F_IdCif)P?XMx~7jS!R0R3y34W^>Zcj~RS_TQI> z#zJH(9wp@aL|;clL@ZoIz3lDo+NmQ&5owu%_E01Aua?cX#biz$Ssj@W6WI<^0ickl z@cD4DX8b3XY_`Tgv{bD+d;joobV<*baUz|=af0y5ov25cZ1zhdA$Q+DPXSHWKiO-{ zKmQj*HvE>#<1LTzXkvb8SQ7(i{VWI$wP`{Z2L{0Z2+8M|D;9wrpP1O-7=%;{`rfC( zf2hHF)284?5dHoQ?!|xmsr`Uj{;LJr(0{yC zA*?AkiY`4Bbb%)*BSyYYfXK7hSBySq;>R3%`Jb>xJN*;Yf~9RGSs^IS;M&La+7iS^ zrYl@E6sqrGAXw_BcXtP+mFNby7Cd=;m&h$0!B_a^_B+mX5Dh_8^!>N$hr?tMpBZ7U zBBHYw3RYs?)#j#6-|e{z{7-yuS{jK~EsgaQxYPHbe!9h3$kG(FtK982!xj9NY zI_a%nWBsB+H(zt6J8+3lMbxq}ijQ0h*7|$xTqyYQJM6mt6H|R2JC=WI_3l>71R{jA!9`&U^6KCcN>kMKOg7EfZX3k}i-zq*EFWMWUtKd*2M=SO%xAudm~~ zfdDp{5~$?L(3;9qm~`4Cx4OJ3$;gD4-NSzUidzJ=_!Y}%71-|$0jz~Vf1AJBxgH&) zCdxU-tyu>Zdi=*e*?+8IX|=S!!hxZAqk-i4doSKiuU3_KGFLn{@C)N0Nlh1STMmRl z?0mKewYk<{PEMHT$UdpW&4=i9B$~Z4SYA`ZqL1y;7X%0J^8Bm# zM`f?a(zOcoeo6Y3`%xD4IYn$yG!dV)vT}TK9Dd}q1A5F7-nAX^SQa2T%h@aobo781 z|1{m|Ug4mVABX|j#Sc_%!3a2`k6XUX&Fc;_V0;qu1e{jWuJG>}wp|(?_otLZyiST_ zTXn`VSY?>F19)`YVHpOPG+uvyZ{NQm9;N$*hr_Qlm?!M+%mbJeR28gyI^1uK?oSuW zykSwB5T-JD9E0xopSA=2)#Xu~E>=OU8On*9D)KyH2Fn@->NK6YUla&{FK+IW^bwDf z#W+LMIy4>7^EU)1_hzQvO7(lrU{Y;3YQqr*BlUzO^@OnfF*GgcrIYdIaD19P+ulB# zbC%mU%(@I$9ko7GFL?rid z;S?c~xX)Zdan^x;OvzY}u*YqZkgbyD|4tlZiyYIbYE!Z(whKlSeUlXv||b|4#t&tE6Kpz?CGG zj%`=!#KfZsX$fy;?TNikW((==NZhsot=j#RvT4fs z33jtm)U>u959g&s;4a`LNGmgO;!_u8gu2=~p7A!b1k%1GId|d`5=$I)i~^rio6cZA zRE;ifr9J#9fatuEO2Uee-;|sd8qZEwbzGM=$5vgIbX_jm&Z@thYW~n!TW$ZTrl_P= zOG@8Yg&{O@>Q z&+7|8=&r0b>)c7v=0LMC^r_rsQ%>tI*^HeP;(-Uv2Ti84Q&18B3IC{KKZuX0;w+gJ zM5y!J3BXza+Y~>8y5QZksKd)iv86or-IUO$7thTA#)5v#D%eo7`-+p7NuamM71t3@ z7t>ob6pa>fHr}vc^U_0W6b{NCub~4Ue%$j!VR!ddeAHPVzc~`^AzYM~#5;2pyOY8c z3*L{nL$2<){1`#ay!+6LxlIW6Sm2pEVQE&NqWf~g|wDYmgbhZ?VrCU#2?n<>^- zTON2P3J@W$BrDVOop*YyVJxwu0(l@{ZgYG;G%h=C<#o~}BHnDfp2aEOQ}lc^ka>?u zldkc$ikf>0@V18ok~?juZri%7b2tRn95}pft}Z;a6gF>(u0~+(cu4z~YYnmXTig6T zpbp2=$U)N7QsprXJ43<>Tix+DeAC{;=qSrISrkR!I}!Sfp@CP}(hv3AgV(2{o15l& zXMSt-kfMe)jNEVLNtSCoOB1c{AQ4b+ zLC)z7K)Kxkc5@&nb1nvw3zCoPP@zAdIZNn?{;gajn*CUus2z6ZwP7 z?_R-HrusS8jCh7PPE#WjkvQALEJ9m z!?-W5EoR+Q18-%4`z!eOXya4ddx*4*j7d2@FH&iK?N?oVi}Wqv{@HK}pxZWYam_2x zPae|%^bIt&*v+O&x5H)~J4`#wV>iremkzpCd`fUASlI{C<2jG_wzDdgX%ZfX0_l86 zCh3wp3yJ(@ASr5s2;(*C{<-s)c}tnZURJPz;6|eDv`8>K%GuBcD!;4RVd6gH7^2>x}WG|3o`l0ziG^+QGT;h2zWh zfJ`wqH>~1!i8=j$Q(-WfXRsQFYdcPGoJPgy4>lk+tT*=-9i{m-H^R*N`#03x)yVo4 zl1xEb3QkQ-EFAdU0qoZ8hf4<(CgAZH+m-$J&}XZ`P(mV@+9dZJ==|KTGmID(7ocwr zrU$H_49^Vf8y=IPAFQ_8>+}8cfoC+^e%;P@4z*lB0DaopvwHxHa5j^(d7M1C;O6IT z&gb`OYQL{lP&GvwT7H%YILCuR^$iXBBWxV>{F1(xQb18@`C4t472~hU$b{UA zYr)ozeQ?w?b2VE&uY*=hxN+9T-Gg4%o@=JHZlWTKyyH)|ll#dGqo5?R}3Qk$91iP+H z3uFZ2U|~ltXiy*lU-$LPD=Cr8_Bf?&gxT%_U{J}%9*m`wNBnFmH6CJMY36`s(;HK) z;qPc3N4&ygfY85Oul`2>OIi>C8ZjMje1!oBPlN^T6Kn!AGk%fr6c9(L-5)QOJiB4! zKlSIGfS+PRm%UUfJpmNv6E0-+VpM+qa}4d5ZRMB0fEj+pIX;@gn?8wMV3RyYgc~-g zoB(ZcNj|WEZetp&4^BC)%rvevOz5$O>s23mAL+2E|K!k&2#WB!Ia-(LftN-fkORR6 z2+93C)BDu)KGl+nH=QJM1UAzd2E-DLFU3`Q&O>ABuMNnYbQFn)_+J3^4`u}~u_d*@ zq#y4J?fQ|pN~*zz^6j@FQ20sLxwhkFm}VXdYpHH`W)_u&y3HZRWEdd>8JlkxY)khh zqrnLz05ER^63vyoSiP&@oKu8PJV1=QRstg7oY^lrSFF=3h*y;sThcFy>#(v+RzC1G z-si&UxB7h7co1u`)+4ld9-n)heL`>7f#99@Fg?bBd&4nH4~0Zvc>FIv^@(L=#J_au zEuTdhAgXf~QLD^W>j^RDC*8H4#34k3gVtC0x4uGqC9mPBLddoD&8*?A-0g-u)W5eVBG3oqNcUMZaM`Ok|O9b0qYs@u;Bx>p621FtTm$slDpJ z&dzT3s(IZU8MJWOfcf_9t%aP8mtNF34!_s7O|wli?EzBMwgMB{@QOX)P%vJ)jvpzx z3ZK_&?fZUTMwZLF z3C=aw)U?64Of6Wb4MzkOp8}5jl+b+9q&@<-%@2#{)FF!E)&070b>}e@i3k7TRB1u! zjePj9{4>~v#D)WE7Z0py>tW5d+9qjb$Oy8{>EyTjEE&dCC=x+YIao4-M&06@&}%rv z-H(JVN;}MDD@?nd?iI;3dp0Uk)H|M|Q6Ak%7)Rgd%uK?{JIm@$D(}~asEHFkDIuO$ z5wCV7k7hd8$@|ayYTu;hEgvnLwi7Cy%3gej41+50TmpS2xP5K@V%KvoZwKYxq*bNd zJ*6Ii@37?@XC)epC%7LJVAHWHn@)%6qU~mgKy@_#btyg`T@_+3HYUNWW96APOZA$mHzexz>-HGCX;R766w<;-4-d~ zEiv5i-0&4*Aii71^>45jf&ut^VhCx-hLoS_@K{Lba>pWHO?p z&4C=%tutedOSYR!>!FBvp zZ%Srn1(Y`UuE$N!!iox6Jjnpxx$OopIojPzBB<4DkT9)f7ZEW@AfFS)mhK+;T&;_z zKx;$3pVcfdxjGx>21!8so8mMP!`IkelkS-o`f9%AI4^4aJt@wi3v97ipv!{ZB?c5{ zxuko*7CeNJ5uX#ywcOtksLe6{JMvo54^p?r<;jHwLp)wD8_lonIS!+=nkdMdklw_L zsm$hd$M9`ZAd1^i`fk=w)sRe2Fk1_KJT!Ngl7Pu0+2>Q)fPEtYLj-lO_p+jTVNW4m zZ~kZ3n({#uZ^}0g9UlSbrGFg2P%tBE&5W?vsP1}^es9r|>J~-sQ7se03`?13gy;#% zd;vv(F-*@C+bS{xirU%?p@a@$|H34XEzCD}TRi_FS-%CEL0=U%d!c^{gYh?Le(bmr zF^}`w;r%`Y=I@Oc<7sRQDkMbBigk_usUY{Iy+Z9Tx| zxT~jAqyJ@1lxW{*6%F1bv|OJw?S6=f)|(|G%-nv?djB$fkq)Ay1<&6<bn97*j3kS z6>D+?4)YJGoHcG&*eAktteAi_PYNT3sdSTeK9Ps!};-ZH_bwoY4Eop zvF2Ze3{E*+pJ!v!@uRj8DiuF{>sQ>Pr2m4k!6!p7!l>d9f^tCMF2@FXFX#1BM(4}5|7_3BWVDYt~o`C$u!A8lJ;y8zJKaIv!=3p6T1dI$Il3;SCjE#o*k zI;xU858;`bDOr6+nlf-UMlyBg10{a@W$b|>hn4WQW#L+LbZKdATkq$Cl=8v**+QA% z+Zs@^C;Vb`i9M>_6>}LE9yQm!W~x>Q^hN${Yg>0Ga|>>XYcI-$-5&QAQ4&Lmt0k+e znt4P$b;e`tjKz62o%cT$utAudOW9mSR!mW#1&jPt?p^m6H-H@BuA9*!$5oKU7{|At`NTXrM(v)bu^X~qGy3^M{|=PiUJW&W%n2e$B#__lJj3n82} z1%i>L0wr0zeGvdHF}X`?(^~?j6#x-bJR2U2kLKLPF`1S-HY5ceyPa zgwoFO_k)Gsi+We8}=<}PjDD1Vn}aRu)9o7P|_k#Ovt;G#Ou2ECG+-!AJ@%AdEv*Z z!D{I$cE{toqr#4G28Q$_=kjBdLR~fM@`u$P=<%2T5!282z2W5AmPY5R*Zciao6;z# zhOpqVINPI)SD{w<`&;fQg)e2?>q;bMZt^c>q0VotcjqfAQ`mQ}4;SJK0F!kvn~pB1!azb1rcDQ?$1eo)nZ zS*G`%jbIgZQ!Kk?tIZi9N07`|ezcQdnW#yzv8>dmF6t z=+zCR=)bS3ele%f=ep$q#r%!qcNbHFQYjsGbW$l@nQP6QH0R|_3e<9xI>JOyO{h@o zydR9;o*17s5Kh0R{EH!A=AaO;|7h8E7WR;IJ55RJ5vXhyLD|^@;Q! zh%xpWvX4;_*M=Xp&NXM3Mu{V=u1KC^%_7PI*p>OL$?#sML45gyT~1r6J;{q%%M<*U zAhfgzmM*51`&3+%Ku^Ho0Ea%ui$M%d>N>u+J)JKVdG#MA_O$X|X*r4puPg%o`0-=h zUZEVL9Wol+9&>w0@t<=6FJuJy2h7fD&&@nIrRA9DbQa(nYBA}o7DAMkYwMp?HY>y2 z`90^NZj6cUStZJ1tC}yRF$e#e%Uv@hKv^IytbUT09y`rO?OEbmNFdA}?+Al9vFi*X z)UqBoqu5dh;<@X2`FhdPeZEQ9k#4nPN1l^{@ZbY&{7G4PRXpfnxR^ExxGOC-Y*wE- zD44gA#h(O9x-LJ7wYwzDy#~V2ltCVaHq~I(Qwu}5FRa&Ad`|~1gQ;v1eKFGcVkcoL z0s?~ZLYb;^R}mqaoOU%%ZZ#Z7D?tkq-nuOtCfEac&6S`3pioYXxUb1om`QxwlGazK zKkN?IJBYE-0yDj#k6sePitM-tHZkjx?HbKH4!pu?*Wg*-2{@wOgSweQ1Q?VSy}a^ti+ov@~Hd1fOo z_zc_^dQdX*>AcZigTo&7p&WA70)*U_mQe|N8PBaOwx#*r!$5>t!y;y93Lg{#VqXvZn<8Ms=zaWEW< zw>tq6;_z}qO`kM|fYP#*x&&C z+Rd}_^*9FY@hk}OB&Yo9S;v(dzeRBS1^DI8Kk__La%p!GO#yxtf)%zUFl;IZACFx> zH(I4X1StZ@SJqF~kcTGu$%v1iKpCI#G=sqB0F?M{lO*_bau?)(`jKJNCN3E4@U9_euV9g0(NVi~cejXECB2fVs+X8JUf<11aYMXg@9CmTw>s{k>kT{>_{q)O zJcF>Eg4@;Y`Al+`?Ew{iAD=-b@#TKQw0SLy-!WbCaZzmS%s*gMTR+nlb2tG1JmF@d z6(?6h&1yW84g5KUI{SdoN^$uochz<60Xh559SO_oIj5dK(-aWeD>&PYy)B`G;A{u@ z2`o5f$Z-tOe!0<~wSOARc1(xX2MpF@j+M~e-}xym@^+4ocGml>)CV|SoVB(d+`T+b zjD`0kTN^*56Yr;7r4MXnc!=>Zja@vhS6{R$+i$G$twf_k=n{ul8zuR_p!@)*q#J?%Y6ea>A9V-a1?D zGsMNSU+8oC4_fnDP9m&ldBueUjJRJ8q?PX_B&3GCA_DYH`q9ZQ2v`vkH4K#9x8@n(Z*fH_swwyzu$R`#5 z1|zAjksLl>wfbKqYO3H)T=1;X1`hyUMjY+O`T7CetJT^gsnVDYgm2@+3Ao>U_5 z-qpP9S)XRpL(PMaa@~*CNdiS#EDyxOM{ENhLon8DV+)I)ifHs0aIp*hD8HA<@p9C^ zpZf94W-CYZzfpaUfYUsU>)f1qJ-maskCP_tY?x5h_~%$o>n9WNoyWsP0mtX&Y-5YP zcx_p?A3;B!Ql#)t#M^GT_mfB4Qs?hU->GACnCsXcA&bm;uW?|Z|w${?#aIy>X-sBibA?Dy4ieZ0v2 zJixvyh4pGt_u`+JYO-%tMJ7d_3b9kENkcH^JPNK;HzRp4r!ZE@=QasNI6fMmyA6(! z&@Kt)Z(?n5&x%rUD3Nh5sp+TX@7~rZ+f2K_!gO}uX#Grx9`Ih$GImsN->nQ?;|`%d z?(OZl{>MHImiI>xvE189B{AgV#JZTGuRKuMemIw z2<1pzPi?~$Q4~K>wePtc87#>t=vL}PF%C)~ofEpi#;TLl5$n5$#{p&K#BK?dZPm?z z<<$ITm0`E`25d#}`1xP2jik)>ITb$m}||SYJ$-_&Xkxg_8aRy36WP^*~zQLXWBmmkpF?rXt<0VIIB7i9?ahlSEzQ;wLS=G=<5`jGkS#cG4Q83Vb$j)AC=B2hHmnk#wtR8wX8L#f??EBfhvGpD~M^b88)cMT>Eoym$BnGk;qtHR|NmzhaM`)ha(a!}QC;dUe$P^o(q^dg zS+~SmjMgyuY%TDmt0hAh#pDX0P|yRTmOfHVT4^K0VEbTNQZ#u znK(p?Imk2qP7YjpN%n!3R$DfjtekH1JfAX0uAHk#Uu{Z;5z1;(Cy#_l9=R2|&uG31kAyKg1b2w}8~p>$|Ss=~lAi>ile%>dH1FfOdw68;}JfxUPtz$C$Z&B!5Trw90m z4_$J0Cl!So{2dl!;Ke_O| z+Y`Y8>1|R{(viUnrI_zw_*npRR;@xtrE8-4i?aDEU)6r^`` z(Bd3y+&D<-&Hu>e#SF>OOz~{r6&!)sBoj?NSvc;ghocX>s>#6An(ou*VT2gRkubQ{WszOnIwbg z5iSpSi-=aEO3aPS)%@thV@9|U{4wVM#B`FkQm$EhN=A5svfQ3-dp5cP z1EakDQ<7i`?%&`JvFfC0`nA+&LG*W2k+n+C_+DrKo1kJ#2{_>h6vNBZ>||}L3-gYa ztYn2%?CIT{yK}~>!s`+=8;?sqC(xG_Pzs+f!^nEe49BMFl``K><&)*1uCh^J9l;$CeR{L@6I%~SPn^8fpo`H2vevS4?zVFVvjb<#eAYBuYn=uM>i$%3*uISiWn(z{J99KS%{6SoV1s!gC{x;q69 zbc-hxGz;o)3xAB|t`Yp_Jo&f-n93N`1j+&&UIdsDq7*9Af9om02O9q3tqpE?`sTt; zc{a`EVWm`Odw=Z^jV-*>IW9HtIg)UrAxgaNf1@l`ZPcoU?6Y3%SO!vy64{j=l=RXy@HSv&1`ejg(;eikjSi#gYp z>>R*k8;iRV=*cVNS3lmjw6UhuA1*^hLn@z#-JQYnTPh;>=(E}G5BD`8L|%w7-!;nT zDY-|4OA2u(MUrogpzjKf)2of64l82=7T>%bw^K_TRI^V0g@M&gn*=iS{E#M&Cw>M} zjfWE7j73$rQ=x+seezzXxl>skO!hBg_Bd}QrZ4Jp<>hAhh=}PTZ)tclId-K*K(@b9 zzB+8pQr?>+cJ^eHFWbC*W$!BtqDxvnlVMn4x1 zlh7f~n<{m>J6&%3`?U-FlE`|?FkbWU;iYBz6A&YiasT3DmpvGKMSX5O*!CwD{fiuS z73pCC?No>OUmZq+gwUKrd266rUVF!V!Zs z(inFe7JermwD+WzhAxx;tkpvTuR>TxzhG%nI>y=5rzZTcXDRn`Ff_zY7Wg%$RB)u^ z`MilFy{5~TS0wNheLZjBdFE2ukD>#@$^Ug(#XrGLc?hteg^x^!NHpQ8r+`|>w z@5_DU9HVTXVW;0VpzCTpxxK+~*O&ms{~f@uCmbx^_a7uY_|GEnpXY|}m`A4^D*ZJm zG4F@K6~pVK1%ACk=dL>H1_3#5cw+Z-wgC=Ys6wT8ZeOE^1xzo7A7W?jn-pO#wM~Y znx&EfR-_GAYw}5dfbpZTw95PN)~8r%yf4P7To49EMkaMl{Y3cqgKD1 zUGaBW8An*$-^{=&K=fMy0V2gr|4eb(m;qE6ZT0&&izaIJzjPmBT)mU>NyxF5p8#dc zqP4#YY3g-`g{=OhJ(BV)+S|KRIRm^xUA}rcXI_DQ8_HU$E1M5ZW{FDr4Myq34^&YR zN=ZJF*(>zwzBa$}!PDvdIcX7b1O(1_ff~FCOod6?I}u7D;$cE=`$O8g#zh#AEV!Dn zp0=6(gRNfzQE;+&+p$VGAQK4s8u2_MlHF!ONN1#X$>r`xeT~u=f+qS3<0=ZTIZ^ zG7oi?SyB}p$%2{gZLG`QXAY>1BdH+BvXwCvFx#e*ZnttXQ3H&dSt5Fi=RrLDHMZch z-~Y1Ws1no@y16&gZx_l?xrr?V8?e)Ew+TNJGp* z|MWc#Pf$bD*@-ka(&04@i3|bqZ;v>d@TNX|TK zW${J)lS|kr<tXuJ?pGk$}yZ0IQd-6=0hareG@tYnDoM+<>t`B*(Qb6y`gZ6 z;D?0%7=nbk;*4~Dwx0(;w%J$G8k`!UNs+|;3W-4sL|Yl9m5N-Wgd^MVgD0*etJ%U` z=Q4KSwPdd)>-^jfa4yusz6rR3U#pP`2?hGnoElQ1eF?#bpcK9BOPaETIiNIzrebi` zi3eg4qwFF+&dF#L<>e8KArdppG-D3Wpp-Jw75BE^_D|deQ+Y~A&Lbny6LxY9#m>#m zDFirXg>J^D9}(;AC-Qc+RZNUcBm&R|JJyDGzkLKYu#lkdMsf3O;M^$pWcsia|AjK? zq(Q22q8q?HVO+pdGSkV(E&A1nc^vRz89j2M{eDM018%4@xM7e@;lt6$dGE?|UwOu!Bd z_Q$oiiyHS@N4p>g@^-p`O-;XTt^T7dM$;D z`T$gk2eMZoAtdUE8&1?r&<_-x`4vNKQ*s#jo`x!K&L#sFw>8oHeOAJKQx>( z{!VvZJeF6Yr@>*C=7iez7Sx`Jr_GMm5EUUCv|4_&whR>-|2SbE1Ql)XWq=I_)u#hi zPRcFexa%3ThpVSD4P$i<^nH%li()W`rbK$jLW$R!eN7}OpTe=WK z{o9Jh5Rn#jS~x4X$Tq6LdNWSk@wgDG8YK5*f6t#o@MrE+VsuV2y|BUbBO{626@N9iFAFD_IEQpuSuC3s#B%PeNm%D_ko8Z{(T zA`QY7EqojLj67$@WV#VFoKd*$IkxIpzpr4WcWjmMm17S2P3}dJLax|C4{PR}Me0~T zC{{4+1kx+;yVuZ@z-V)eJS7vcWN(k|l??)-?#&o0O$?AgK4MfjtnRL;V0{nOHZnqA zW@2=wZU(pFS5~UdhcqSs2QmHjQirN)%iQcT5^{Diyi)97+|;u0C|(;V=wZIcZ;0~g zg1-Od_m4rUSjkcysSWNk{`0VjwoI&zTnPke$t5zt)GX(oT1VZ-?Ci^y=UZAEJWPQL zH}t!m!w>(0=BK)GPEns@H(SYL^g$Dyt=-xPWj3qW>(7$E9%nV}Wi}pa#4Lxt9w;z3 z2#0-k{H4CAaQ+o95%wjE{TFygq(!`aJ1z`w5Hu z(pfEkFbP>P=kWH^!MW{bzdEj+x^Ug_Rne&RMIe)Ic3?Bu=jyrombF7fs(*4$)(yXB zttPs-9G`zO>JPG7@8&r6Q6`TR zd{sBgbNdjR6+YDPrrf79*$U!Kg}(+R8B#zUlY@R-QK*C!M7>WP+}CU(%EGafie}eu zFfj4?nlI`0cppny}T)0?c_tUW17L@9?FtzdV=Uz8?&&pT!3l&T_0u8gZJ#<$+WI@Ca zgBkQp9^eP}?GLTHm2V;Z#rXSWd0^##@-ntb76;OnBchv8SO!)YHENU0S}R*C7|*GD z18#7fBQZhgH|x-?%Zeatg%JdTUecZ%WGQLvC`&Mkzm-Q&FB&B`A2v9`bedy}vi$F% z7hPspP>yBkU;|#Zbnp1MeBAu#$nKHfdh8(8t9;v?Qo?avuEpNI;&2@v>Jt|OHb_xQ zDpo59D~%r?pu_0Aq#MH&3>m2U#d~PL0Yo)3_(KK~W1FqtI)(KCSNJbPBl#pAa{a>@ zcvm6Vds}l*@##rD|4xCv+i8NRe)lnK+2!%^7tq-LB@zHmR z;mXqT%*@P1co;a?VJgeuLN-V!wm~qB|7Kg@u$)sMhwfi++%07+;!c+aqNZ?vZFHud zlUohj(${HEYB7S_<$+W9OB)nh2|Yu)jQ}&^p>$!|{+M;QL;?aU1?d`5JMSU>!g07` z%pLSQEdRW0p(VFL3!Aun+5y%vikage6 zjdSMx96WJ8$+s>L`jOBA>xD|t0NKD78QP$wfo5$_Q`v29)I^n)f%|# zYO(iI0fGt9PaF^oPbiuKTamY}{xnCfv z3`_o|U4TfXJR%*DlhS;*%@4KQ5|KjT?K0)%(3}y6wg-Uc{}}XrqLK zs7ux;NB@A1Ey2n!dcrEUYRp8OAG0~1qKE=-q{EDTD} z6)G5#sF}pfx=TF72f8?Kvk(p&=G=!-x|fwazJ}^EzyZB~(FXCkP1-oPjGiAM`mB{1bz4kn(iS#=f zU*8B#Qx>=0y6dOuI$pn-@0C8hOQCm_F+TW2g99)9X;IENcVmi`9C4n9e`;IHO;K?sBSk`JV`l%lc`yw1ZHpQEgf>= z>Q;dTySWATal;(S$${(Fp%$Lk^2^tI@z|K%$3;c1e@)NhGS0j7(0*IPk5=T5;fm*D zetKN>-HP-CWvI<=OXXYv+=a&D!|on87A0zJ;xkfMiXwD}YvAt^&jTXnQ{yY}Y*>1m ztn7J~?gVZnN2#$Nw;3!oy4Awq`Qj17zCIGU?IqirCO&`F*Jvsxjm#z*_;E^AJn6cD zK-usp;Ip;+aYhq%Cdc#U@vUSCy5zGOG{`Id=aL!45;@IXe0O;!^tn8Vynn8iKms}` z^$vaV16?Bf=mAGcQvJc`6GtZthp2$eiH4pQBhVSyvPjnQEaVS)4=*5uV*8&JyhP$| zfRw^rs_dmM%M_0D$5dFDqm^MDPbb2+|JSj)>37 zVuQIG{#*a3BpGNkDSZ$^?1m;w$#2@xDNnm!_5p?h6TLsi)&&55^No(|Z3u`lON>9O zSiRO5oy=C*Gh7uuXwj;*`6rQk9n-Y^dK?a4k5!4C(b4T0sz`H5s1_100Xg5zdUFy2 zc95x0B3Lmx5iW!=_Pw!er{vF>6YWTN{UR3;z~G2$s-Ea*=l4@s-l7&C0pCjHP~II3 zV2kuniENo+f{{A!hcoc^UaSLS?-{^(KG!9=Ej$a2) z5f5$%Z0{!q4sob>p+y%fXL2XwR8>MFpLCGQU2$S(I&(rR34PAW?h#h}<+mO<$136^ zr3tmjAFhWoc8WG);glvTXWreHIU*`LftIESnyE=J@jVPM(?bp9CxP;l=HQNk=aZ|h zp`*YS?KaO4i%w@m2O6}!q~DZr@K`I-NEax@IGw09oN#B|VZDeUcA@f83QwRBXDfAE z^!Vtm3+)?)-K#*=c$L4xJ=TL7%5{IE;eqEKxe_D-vQ}x{KhVqnxs)xu2!?BR+tmE6 zoy}&?{(z4VHJ35j$)HKkkxUuzdo=z*(eeWR9$vaecHP_Kr-cW1I^DdtEi%GQ_p|XK z4#D?24~l$tigG*z7eD8(Ic6&jwy@5u>MsvipU^^Sq{9x=awnyRf;!_1FauE<%7{5p zpNZpX#QdIPb1LliiVim7_HL3Db6&*?4P6A3(u+rb_E!|SFQW-zcY6=yL?=~F$dVQ( zL-Oe08M>s+nW^UL%#>)nN2imqmrB0(3&)_^Gu84NQXhABj^NP*#Gw zFTNF&9GOl|i*nC$VQNw&no_u>x?jv_ntan3@g+u&&YF1d&Ye3j(As$ z`z6EHwMXKW^@*xk;1NOVT#JllD&0t37|oOUuhegJCvUJmr(@9b(bDLLu6uq!C2!i* znW`%+EqsWpgt9odpYJ-fU(1H19mL5~SI5UAkzDp7YF#@_+*M-y!0<5dfnr@7dUVTE^p%I91Zzx6ueX{gtd_`15rS!F@gpvPE}2Puo8_mudo2pfe^*W3{TPHIXD z3qPH=>|l|Ide@9)3TeW26AI|8$zjrM2`*}3Lrw9Aeqa)s1y(VpuwDf0tH=wm96LAP z(?vdi5AS)%ew3DYO$rJo0tjZb1}OAlc&aUAcsx6U_Nd8_h>@i|xGjDIU1#XJ5=1Wi z@n%K#SG|n=Gcd@-KOtck{n33#5gCVbgrafFbukFa2@Vd+OM}8(6eF&z+AL)m3^c?w zpD%vZUC=!Jpqj<|2VmhSRBglmJ{7>3JgKdfWHY=Ee>-^I6@J<>sJ8sF==V7#Ulj5w z3E_PbAOXVYCn-ZPkB9-~m&%bu(jml7uc#`vo27!I(!Qjxakj{;Zgjid3SIXkvBG^rH@W6sDX{EA z1#l*1P_15ErBREK33!*V_41L2P7ED@E~yFW(_L~B6b&~sO2s+?&kQE)^;?UlY*QN3 zq1ZN9Y-zMU6n81OZ1|Uzn_`&rK;G>Kls6ee^}K7T>eVy2G` zhvQ@liTi@Xf|a;D_tSY~SROJEBQ_nhM}uP*>iNb+Ajvi5$tz!wg=9=Si~JU&8RUrq z=Cl+lHC_IhRiF(E_lHCWjCT3(Bh#2F|RYVk?VNC%bgWiZEbj^1K#$%-c1qe=bK5%~e`e_PJO{LaZ9?84A-LP`f_Y{|gL8*zb|ws-28G)KM3%-8f^&)xAJeyxhGY_fglvJ~z9|FDG-&E8aQ~n8%d9>XN2Z%i~;j}U<=Lf zQ^>gI$0;!Hrqhp64twguf?v+$e_EdhKqo{2c!=O~8>oQ1cVXiA9rotZMQO-xw=_1~ z_mEf48>if3aYSCnZe~oQ0*HJMUD)?r1N@n&qgEa>gp*VC_$2M?TPRWDAMJUoq@hDG zucR?8K}b2B$Z(M-Nabph*scy^NOn3W61&L1Yk^G!{f^8nc?dHQ8ms(+34sVcs<~ANh&a^2 zGjRa;BzhtgLfTgN0EhtU%rz+Bk^FD>1*96OpM&fTY@QPtZ5kJRhZimhw0xbu)FIN=JlN#Q9>GL}5BLpX|# zWTi3o0@FD_ui`8m9Cx#!ggB_UmimK>M{3;vHXKfgZ+GFd4%P}grVcb!f%>nM45$4m z(Ra)+^&TqTU0*-upWK(;M=@@e^oa-$GFh7xxTGn0w1pepluHMwuYgOTniu9Xxw*Sg zMb;xHI0kOKmfXN#&POLZQVd*{g&|Gso^wF;6V@S%V$Jqu^Fh`J+I!N_wqO^TjlFEd zO${7)pV{ji8XeW~B~iDo&#J&z<=?Tn1T{0yx~3#M**-V>s6{VIm3!KXmV2l84VM({ zdKP0#;`Xb$AhiQ1F*?G}1*DHeyy!L9ms0WfU#O!k_J?S?MC8$KWRlrTP_>* zuOcoOA>nx=n9>@5Dyz?D?rvfvhS>zfu~K?&vs8)gVo%<{Nw??W%1&^Npm%Ct3A7ZO zb$pcd%Q{Vdo7gX!QG8yAuIWG5t79B5EBXP)+(<%r*>I1^`|@B^1h}lK3T72`K&f21 z0WR=U1f@#9pWiQB9YD4_g$j`N;)dyblZ*TeD>IW1x(i}2;-@xM%`WDuuR9bcLP{Py zqw_(WVuaMr7G(mw>V)mas}103w=`GtvWaayP%vfRrM5fKgE>^iZ_92^7q#~%(>OXi zSBIxdPGjgYr`#dSTmdvLhB~O5;FU<&6}1|f=$L_`?JlQ$vhYD1*jB_|DNA^QmLgKW=1he zs5H)fd$|aF1nrF^xz4G}m1%eV8&$pZ$}3O@IHD193dGX@zA-gQ!Bq!+5X4ZNh?cNa zCALgb`1%e9xV0SE#paRp%%hUvF5j}@f14zFv$i5+|u&Vubuq2-*k;%NG0H!<-#2s1dXv zDOuE-e@w6N{1_Akz3TR!q572~bh#|m=pWPjwa%`el*D)Qb;iqCf>=K|Na}4f5u$Hr z1L>+=M{rS5{>IrKL~8aHWKHF-uQ*fThpqIKID`uGvG#eMpf`e}R91P}1Lkbbtyywm zk`#iYg~C5RKECS(L#KWP@8_$*6F%)R&sT}N8Dn=GG_{0~o{Tq|Z!We}Q=}qYJp<(B zPkpLux+nUbuu(ei`fam}$YP{K;Z<@p7uGMO>Da&MWc_<{qoE?a@#`nrXfFiUj;A4gy6!m2hdni8@HkNK z8JQ0LYmS7^i=xkUR|4m_CM!Bu(oq)y`tov3XwOzn#NLmGK-%)B=(W_cinBl&$3VT4H`Mag4`1-5OF=JL-*F`NFUf)K&9UPCVw0n@TrmU4 z3nhaoU-e~hyX3|=-TwUi;HVbCmL}7H^{(Nw$O5(;r%mL1!vxa)Ri;;N1l{*{e`kU& zRs>*;U7*|KaYLp_v6qBG*i@3&q=U|7QSu5nBy|*7Ko(dLaL#+v$Ho>#_jLiCWm{}M zcWJU2T;^f_CCtn;+Uq9WQd~NH_Z@zT*4yp(?^xC#v;wE@2!Q5uN1yF_ zFi(M97f)whRjm3oMs2-zrZv{x2xIqe_qF#8*F_b{#}D_`R1C{Poa_arU;w@f8((I`SKcG8 z{5aAKlr{VG^S4*>%#7>uMInCec_-OVZA?eZ4}NwZ)ExToxao@mehqg6_u27T#Ex|r zM_=A@@8k;I4n*~9-5WFgUvex;rbBFb0qN9)C(&<4MLqTChS>i-e`mu$Sg1+u%vt@U z_!2W=XWLb{W7HE&?Uo780}Pr{gmAUYrbVo)8-(*7h7@Gs5C_e*yFa`<4BBStYA1gF z3jU?!?-xJzD$uhYK*X8;Bo$*P<`ltQa)+mI7e$> z{_U4D!dk@xDDz_PF5(vHBWSrZ8@;i*0lpQpt0t?`YQ zDHUh3cfs9~P91k`8+`V=C^_-DptCYD#N6tCf5ey(a-z1;3x|-amE*A2(`h=M?vwl7 zl0RS6((dIJ+=Z%!7&Jml6m3X-9$N`wJINDoY^zt=E*xypma;pGMz=btiXhrIZ_u3> zZnNIdRKB;NI9k3i_!q2oZB9zJ3f(}Qo5{6O#B-P0T4t-WT3+nQmMfKxxzBG~0edSa zs)_QJYJcGm_N>gR@BIHwG#^~a_4G_)0iL-5z;0@^M=sl(TqB(r|D|NO`s74pi zi}Vf`r@r2`A>XK+7-P!Q#A3dDJ{4}0ZwKBdX?mM%z1dCD<>)>TG4f(POOR}x+AOm* z^c*ZQj3r{80dfF@R*`Cx?852)JrI6NH1FLw4tCX|A*A4L=pTH+(m7?KUKWI z(hzhMW$j<(D1AQ7KkM{Q3W}a4O2TRN5awYrcGGC@37lgChS&QZbTN+q9CGfP(Q^LD zzoiVe*HaCX^Ca)c0yk2=F50u4TvxZX0#UwsY2;t=+wje3A4jJ|Yn{KAW=1{g2 z|FJ*l=d*Hs4ON%mI}B-14M3vX<|LtS5gQrGm!_P;on^$tL^|9bGmLye*D#Zb5d_sp zx)$c~P?B@egZdga1)yxky440q+}cQb9qx&twqXEbp|6*JvaUah`qehpodbKAh{!>* zy=LbZYAmMy*Hz%Dj4&T4sEXWW8CCCh3|kJ_rWz_mP_A-PCu%Vp?K5TJ9jilyQAfMd z`R_fmfUNXck_?=xWW}#tvYy2J^01i*VVY3SL_&!)?BrizNPvu0?{2Ap`vEP7+DHOt(@~OZPMbmAxpH2GH#QoCGP(V7 z7s0)CfGuc_yQb|uqT9K+DLq!|np9S`INTDujWzSZgI7*cHnZhu^2jbWeFk>?sx)!! zs`F>%@lYZeh!}AO&dAVyu_V$=MoCKFS%7N505H@%mM!flhdNJi@0zQ5vt} zenG?-H%R4+J}Sjiqe7uU@qxFvV8b&Qzb8tuv%P$Ok9zz7;nyw}!iQeMj&0eqdo`BF zUi8o46cyKQZ~*nEls;4}BQmcr&q8n`t6_`UN2Wwr;H%{Oq}!xvJ4Ci28;~iI!b_ne zng9=TbuwN3h{Z#@D^r?2y`ojJc>*P|e%KZ^l_tN%;SfS7my_xXLey2t7;{Q>Nh0XT zIFB`}O9?5Gf(9PROhBUYB&NHoYo5I()2_745s8UB9g&-4wqz_pv`*V-*j ze&Q4dQ>^G;AW=7MMVzg2ycrCSg}-RIba{6@obXk-IT0)81e>Pq=!U6qyHM0)o$Y27 zKfTJWzHoEJh0{jj5H=$tm^3U8giA?_7=-*f_QEJ2H_Z$F2b!gn@Sm#I*Pz2*gL1IR zD10!~NvTdQ00(|o%6JS>dNT_mxIJaXr}8%%I1~Cf@a#*?f8T?|sy08PHw6&~{>(m$ zMfAU#RWJ%bb{Oz&lv_~yUh`4bhOPkqOK)1<`9#WJCb>rp(huA&K*^~X3IekStY-Q8 z;_<{Y&z^Jt%dpfiAy;lWDT`h0BmqP7z8-~k3!#P%H6+2gWY{{1lWK;8jm=T((`Y(9DHUE8w?UQ&p0wJp&j>R41dwovT^2~r>G$XJG##FAz$X6 zE32tq@?Eu))&l<-D}@#LMdBw*M~iD5FKUf?C3FFA)X=>qYyz0Ea)GuMxAF}TNm~Sg z9?ZH_KSA2LFV#q4NfAWD4IG__^|PIAvBGlO35C`mYiWkNF^bx5Bj zLibJx!KBa$X9pBWcjzY;?C7gmlk#+<+Qqb?Vo2Lz*3*S!cy>qHT$1^mqG>T>&oOd4 zN+dbO!){yM`U>`O_Zzr8uO)*}gE#c<2h1+cy_XES)^5;znFEFd>-p{o7q zqIkc0X&m-*?deyrjYp&Y;_JGIrbTmKAf1fQCJx9D|N7%K@^>b41ZsP6Vm_||O$r-) z5L=hU#7E_oMmYibA+yuk8ilrZ|6&PBUr%ZgYp{PM^nm~^h3uGThSAF2uY8mfZII!G zs`Kcj%;5`1$PInzw(Ulc9V0%mJrlRB8~I9lW}&SNvk|3yKztH5!@Bz-=%eQ! z`~Pmsa`IGn=a2d}V}p`Y60pDy&X)THKkK0qi^C}ia#p4yyi3j8^O8I*9LgnkZbG+e zl2_ie*MGdN2NOg!E40|<0p^9CuMcvH>~pON=iT-J{th6iP!*Ty&_f8-V;XL^66KT{Mdj11svct@h&&q%iOo^pmb zpV5qX)uQxv+frT!0#v8RHKxVujs7-NZSTU62o(J++cuWPugcBECs_r=e3>hOSRsF@ zbr#i=CBjSpk8;lSfg&Fr7_lOe8(gl|zyQm!ihslPfge#55}IUiel5wOo_Wn3Fw=G_ zsZmj$mKNr9G%m#0Db3zp&^0o0@p3N$D8Bq z%{7wI5;qpDTjg>^x%NSoCXRC}s?Qtd+zyBHGe!2l5KdV7(a$7N+~(QxWJ6iM1%EKw z*uD_JuVCvO@NCObGNJhfv%lC9VjN4(afM&JNBkzFw%X@|rFGLrNhLWdmSD$Xzva1n zI=n@z38{G!7cuvtZxHn;Ma@tsp=8|B>_n;$YP%W(RN(BXzq__V-Mvo`#%@E@LcEMY<(dgWb8w}4XC0X=>V;&l1#{dJ;I&!88M6USNP`NtKoa+^k@3JVI9$v- zr#&d_U~REm?$01DHTzb!m5W|2yA-PzS;yi;e9B{~LeuLU!SKa`J~;b2svVkhzgt+3 z-75@b)vlVwH@OMIv9@X)Rj3oS9(sk~UZF6NTVH zfTh?Ka1(;}&8b+u1_93RDV1<*u`(N|?Q@Yc(o#m~FOiyCu=`57sM4`iOQpp9PQ=pq z{SbBVoR`hBZYkS4PTjhRZ7?GTa^Xg0k+ILvi&6OBtGQeXZDm*BFU8E61d;rXo%+w9 z02s&bzJ3q)wmY0)xlpNZkgusK#@Cb?(_L-XhQL_C^3hPO8*9MoGAeoitt;`Mv@=T5 zV-3=}N`SH1s)q&Z>(qBW@lfp*79*NrjH>^{-t{^;^IMm_P`|X)tc4H*5Xx=V0S*Sf z;z86FvyCnJBkR|_!TIC?19{uZz#2rd&6aUEvJrr7PP~Pp^r(3g?_kr9LB8w6y8xi{ z22$i#I|khDF`mk8dqD2D7xdlqE9+sIk`3$);w;ODUc953jtEhYRqx@^n;z&nj$W5E zJRY;O;-|8Y?nZK(8lM$9%mfieRQiN}%xhoohA!@os=^QbPSENb2Q#fP->-XwVvyBh zV(g3nhfG+BLTSi74s!kICA}UGW_i7+8hm@o`RW#_u;YJq2v5gjT~X`L+F*5(q3lxr zBLu|Ch?R|B2D4xUk-&Q2lM||ifu3l3k8;vL!DrN`6Rp+m)Z&<0N(*6>`BOhu_~Y+M zNW8PEL2fx=@i~N0k%F8mBLn(APZD|t<2w;fq)TYHlaGH{+XyvI`BcaIUOiHIlTg{X z1~RTk6!74;PsISgP;|psxXe{m#SnO0`Ip%e)tIEvo2Rm}B{NJr@PJD<6|J(X(V+z^ z^5VdhAo;bA`{w-O)fI}sZ7UX`#6GyA47bd)VG z;NQisWr%)hP%<^FkMm(L-~b)^89y;lUzIpJaW+0wT*c>Pw)W3xWhenUa zm_CXW=rBx1+n>^Lb(Gw~4G3kfWOxz3~Fuu>dDdCYtC-~hW#3=HJJS^H7N9Hlj2;< z;BIFA4^w9y6-Se`aoin(Yj7vHySoRMV8Jc83{FCT!CiwB+}(q_yAJN|@J-&`{dP~C z!+)IVuI{e7_o;iIA4XYH7zB`zSZj}PJb;iehayOF1EQ)DRzm$$pJHxHoepnQdweuC z7jje@){g9lC3Q?fc4;cUHbt<<`M8$tnT3#5pv%dR<&Nr>6 zo5MV6hCY)q(VqZPw1-6EACJ9L#KBoC`{Mdf5X*H}XpfP*ycV9QPBeLxl=;;NC`TVt ztcn6N@f3z4V{x^vXH_IhgQ#||XU%N2fWDV+@1VKQ@3cY?%5H8n6FU=p(k{)AD*5C2 zi0U>nEEiqc;f}ynAFIc)PraLWZi)FsQJsP}6IS`Yd>yd1r*HwL5I$4k6hmq@`7=3t z(3*3cIJIid{h2hB!D4c8M_)}QaS`j-iemLK_Ra+lzK3&TY5oj1A;^ZyL5=Ai8jPV- z5l}>XIF;3H$kB|bK5MsGvN@d2lQnAE5o>;UPyj_035Uk65sr4&-m+7X2<%#zo0bdM zG1fTcOI(%fYta7oH==eFxs4@mc!s^Yp^VQj)(_N7@G9_3H=3kjNNZ(OuXL(gc!XrEjvsmSY)^_N4G4=-NNEsl{*y?ed=~j{w&1h(kO&){tHk{QOHmE zWrLj?it|E4C7@~FQrTbj%q|VVSJ-n>+310WT%(I|c>4)B12L(7(7i7UF#%;l+70rw zPIP?2`a$!Kf#`wpm-|&OC46G&7PccDw+qUF!19HIQOeYm32?jE%C|~|Nhhv*29Lga*1@F=2ArFG5 zISH>6MnAEeD7!2BZn4OVw$bDHJywNd42ux*F1%-G`pq7m%Ocr*Hy-0B2Z=O$aylnb z89m&Z)UF9k6wjO>xELq7J&LFAtT!#~fZ{zXHmSfp$q}0XSpqBBBhmnUX3YZvLk>_6 z<3t!``oQiktw>lr5&qqrc6U%aQN}4#5)o^Q#`W@Uz*NmjlOYvZ7{CDOhlu1P;X2oX z=1>ut@5lIcZa`z?Dr$c)m&vS4i{;RC^G{I5@kDGea1EX?hzmmIF7R7#F=WD_44e9g znCuQN4WGkrQ$L;hV$BMwiiwtC!ypfk?X=e9K9z1;J=c`e8EWw4C-x=IlZ*x-e$T7R z;UIyT(V6maracuHg~jaRrE`Vk4hUq)BN#06TvEC8i$_I8rtMJi6%ZbxCcsBN!W$%KnQZsC4@OR%n(}fx6@4uEGww%^$@CiJF812X zDPW@!M57Tjak-+GSIi&S71nau%n~~DWSt-K1eq$j^R46$lMJn0h{VN)!$E_~^=?0T zrRze>&B^D;(<4(hU9vcOM?lHI6N4Ir$deI1YN0r?s-8B5n}tqixYO>#Z}Sa_y8t0+ z>KvTwO^`JCCs$Gt@pMvAz*;Gx*Xy=KmI^*#I`^Ey08%V@5ZC=0_h#_RwHu67z z{?ynlu~!RRUXoDz!5Ov24C0UnpxcIFw7aBFJ$d?khpu zJUEI+c{qAM$*bHwr0t09S+D9JXk+WuU92m}!Dr9~3nMe7Q+e6DXAY3EwMI=gY6do)xM%=IBr-AIi&S3Ss`Ab#-EJ_#*6X?y1l!6ZrD zA9h~vQ8Nx`9U@WN#L7#L?4&g6nZkEERTj~)Y;<%l>bUmaifJMZnCFlKgdFZcMn?AVI1?T;7Kf4!tMVxsEm1mXw z@$sUO6g9H9mmTr0S%4N7R<=c{HF1RP2Zs+zshkb13ATa7!{$5}|KM=P1)-OXsp z(l=W2EqebZfA7PSC6`Ox_o76D>KJwNf|t9VTJwkEz$O+cEC{i07<)e!u?nZ~Suq4J zI_4n`+c-2A_YGaEGag$3gQb5S{4LXbA9@(UHGC8_KRMQhCdt%w*qFi{V;cbYsy_;!#Dkz zWDTholoN?rOPJ_B6QUlBkWcyWiU{_i#0Qm?@KhVF;JMBjD|^aP_cb)kY`a^wU1Xv( ztn^qkg->MR6?OVS23_D!d+8V$W^+gjxk`)^)#)?bAu8f&$#+>c1A2&Oo!%#I4W2A2 z+6&w4gAJw+2v}bk{VEU3aGnaED#*0Oc*KBRB#cxpM&kn^`N@|O8TZlJ;Ma!$9K^(G zb`($hI2&s|vr29XPid4e`!qZB+sl;=ks$cRQat&+81(sFe>1U zurXO-JyhR{EV?YA(flWSEITq$iB5@}VQg(7=09LOprv+QS3bsPczv+Lob24hgnWf@ zeF2-idrpQ%|LtTrT}^`VW9+`3nVXxtTK74pKbXmJoGn(<`{kKq<%@@v3}r_`d6_Pj zc3f`#n$=IT$+js|y#1Mt3SVPQrE6c*MxEiH)_ZyM0h0H@RO)Pq9ruuw_p2Oa1bhJ$ z7K3JG3L|&YS^~nYh=$C}PM;NxZPUx2ny%THe7rBPVi_)zK8%?cB=5$oWOCzfffN$sH=+Q=( z2)Zi}9f*Uqg!FQ@u;BlC{->sud^u|bo$D**wmeb7sTpayfle?t5{#ce@)v-OSQ-)m z|9NF5@@wdfOw7a3=+ejfsboWL{qKLP5K=F2`%YJ1!4CG+BgsTO=`gQnYoB-!N{0Jw z_qUrT?5SXR>0G{o0MTa&N#Z6~9~a+=X_k3Mv(v^y2Sl@D`O?xc*Y?DA`0!Wq$O6R3 ztG2oJ^Istd2n|DLMKOSRLtZw7Okwed2xf;SLS1XUkG`OiJy=e`{5f!ey`Yl;nT2B7 z-t|u(Uz7r<3pIuUW8MT{p!OEOh#ktyXs_f8R~jn-4@IlMxBKHtFYT_Yralo_o5Hi~gy+We->xnhfz3G`uWx$D}ZKBnT$iB;B4LA$l z7{6)EaiD%8;IQnIcN(M!r`yyoB){u1q~e7AwnQ40GgxIR*pC?Kx(A~l6h^T4sI!0X z)=qGa&&S_&{DT|`LQQlN8m%QkdaUJJD0Uyvq=YPfdJ0%*GW&J&p84Rq8VZ%#`vlLC z9IcDkLL8^Zxm>?(S&}>NHp8!&7*f2zFUl;y$U-ye!}<3B=27M2>iHtsB;$}Kq_=Iu zV=~p1Dg3%VCN8;%uHST%f%7n;=(B6Szw6+x|1^7x5WvXyn7DVvTdTHwWr^D5T6rdf zlS}q>iGZgFxuGA{snQy)C11BDvd$Kz@*7p(f^M7N2O&7CYUOP7SYt!C<{c8*Ny)yH zQC?Yk)gyS88a70AH2(PGHnb7|^>n`s_t?awCkAO--1y}WQuD#DpXOZwkZIlz7hB%S zzMDX@Og_%VybqEGs%moR#N@%ctrN%`qOKL%3+Ec!(ZU*s#sze5j}#7e7>$ozjKqxs z)Pp)e1ikctV_Qu0j1EtU!_rbMkt4cb`9mW@f{!Kuw+i;IiDiO{ z{b)r*YW-WS928i5VqU@?9J>4A<0oEXu za=+?AI8s36amTn zyiVl1-SBFdvhZxH4ycf#xtbI)UpFz7t;wKis85>qVF<(eA84XJ9c)KHw1wcy+1PlK zGq+Dp&5%_=0dp(|hGb1?jy-v+CQ_GSe9DmS z`&YU647&`+S@gU5!= z85a3gb@Kj|`s2@y+d#FbiAO(&ZUMKWS$t~uHMh_0G-fdmrEfabYoU2vG^$<2-Kmfs z9y1t;#agdQADSu%*((3VR#;K(l6$e27{J~CvVUK)2Y5bhEC53q&;=$`CV>k`b-X*w zxXx?SqqvOG&rZ{-qESc`=IU3dhj45<(#e)oKCGGj2a*BD#&R<`6*+PeU}{FAnZ_T5 zD|bY)2|OduTo4FU?9l;&@{_kw7FvupESB(#xDrl=nC1~>P8R$)`m}DRhmG=C{CPqP z*&JqBeXKJ3uamew7c95|aD421fQAI@WaVl;1bb$2wZ)bQ>(TYMFhcTh^AW1)kmA{mN+Edy*@Rj< z?{EfDV0mM?55|JxZvkVy1WUqP>ZA`GGcFA}M_}X|2<%Be!%3Uv`DI!!o2!D^a(yV) z?r0Tu{3y-BGF0f~9Kk|)nrM?mU`D&h>aPq!@iFlV~Cy7O&)7iEw!b2ZxAW z$;dz(_9^8X{BYZ6WZqq2Lcc0VCFs$Fnp2-20NRdxckg3-`^A|SinpUKh}l^?44XbI z=f*6dVP}Byod#NNrf`0xyFk7u(n0-%qgT5ITDy!H7zr?TIQg>Wfyfgn1`5ZdDIB)G z=G#eSuY-rKO?xoSeXPi~w`n6ET#C=YpPiEjKV*dX6o2H#+CMy7SaJ_!-J9?=LLdFgAVm0 z?vIi2br1g5p&_<4bNoMMV4Eg>WGC#yxd(o39?=vFrQ4;_r5?o;!^7-N5g}>Br^!R< z_3D_TlX@}rQSBnoWM6dsntO@?ua}rZxB9)I(e4td-LMUEl=iLcz0h5IjfE!1p3y!M zY9*~w>oA^t)KKi3RSewjnwEm1FoX#FOQaQqD{ql{C2m0Qo>f~jd2L4=dE}7#dVV%( zSU6w^nUO_eKPmqnjF>xK+Z#(S+VgIvM<)^BdvoWSY1l=ikyHI^V&|!bi2+|_Pnm_AeRS|hPuFK|6qgF(1>sO*>L>DSCle; zlvr=doIT|>dmh;1M!XUpMlF=RIH^UNx$g;kE?G!=4KsFEKJ8cCI6yqTTu;zC3fnER zk!$ zK>SY?y(9{eWby2AoU3fWrZI3xI7$v0#Dr;~mO09P3nv&e<|HiwRiK2i&(PgkWE1d{ zWV46CpF0lh8~?Z&j2`E4wp3g6v?2lhC0OLIZ+FMzBdq+LXS>FI=i`z7A}<^kK%(or z>CUCJoq3PYj#jCRP5hZs&u1fuf+apVobyYZ3-B1kfNcVVIna)^b2Z_?j7x^m)dNwZ z;>eg|CTK=unmO=f7f3Vvo_J?wUD&xvp6%x3kF}l!%yAw*VWGs)sSj8U^WoTC&Ez=uq4N1NXv?{2kay+rXmc4*QR*GZMb~cQk*3REk*E zIgKuj91_(4af)YZ5738qUYQcL8j29@^3qr-QbB78VwS z(wmOxP%C>za$yUXt>a{Q3uw?er9iRiM+TI_UH(hEjq_TF{6SaYm0ExVDkFZ;vi!%gKgKQK`?~ z^AI85D$A=7w7YwFTd^9@bB*&CJXY?AHt;OA(*G3l?MRR2cT#z5a+aSK%iBC`T&5B< zQ~eWP?3t1$-!KVf2#fdyx;21vl1vc%stUroL%s#-&C%3CJjXY8Y5xZB^03{tZ^5v9eAKp5l|YunZV=(@72T^GVP+I6xH^0^n0i^ znt(&Pm!Ol871MO50nSKGjbg#THEPxW`E)u@rkP20?yAT0cC&A(%VU4>mf&=~Tx343C!^%C+;R0&{f56sw=e+>u4AXT1~uf3 z*s3HYjS@zRneFZ)*rG;bqE$N#wIV_5w{1aH6WA~M+g&p=nN>@2Qqlpn))S&4LYIRj2Qy=W4hdP>y|?*rSrV8=$G)31STZ%r*(?m~ z-lMA{+HmHIkN0uCzR2DQk53{XnzLZ)7I>xc~3VN%L6afyk{ zh)gp(O~M1u*REIQXE%Rb-X1rwbm`PX`KD;UM@PZu#h{_e$NXz!NS1fHLOekB|5ox{w05-v%;0Yly2h=wJ_b!F6sz zfc8eIG&E9n9VBEV3ZaGJt6fI(-O|KCiE-xLisn0SaO)?4B{Zf6k-4Rv)~}7tiWeKL zw&~PU8xc=b=0_HdH&FQLEAuPZqCOFf@KT^YG;GHtxTwwYB^5?K#dra8eBk2ejKR>o zD60rLBMKU;Ajo2)&$NxJudaLt2d`AxFj9JyvtJP_UW;(U;PhpoNKXcAeJE~x_USjl zqw1AD$1hbcDAEK;UGnM+KA6v8z6@Hrs;{kp7X6XnP9N*W$erV36X&qjg2bbi7hC_g z%9BT(LoC{l;#u--Id7!6)YxNwyUZ^@qf- zC@WUwt7=J$x{5yr&=FOwXG#`%`(71kkQ&Mq9_5~N+54C7a7d>T16$^1@-0=Iq+8QT|ti4*nzB;UV zBkZ%xI`+j#J4i+dd>BTrcxleKt&iXBbL}4t=zgUV!DGN5y>XPC4fYCvlTGDn!>bt0 zQBZzzO8a(dXiVpDo*$8X?TQ1?n7nNeT9VKbW1 zHSAenE~x*;^7%M=i@ld`dZe-G#;LWzC8eUkyN+8pW8q3CWo_iB$5p{}>*0p;(4w#i z)!5N?$=a)Vlo$DTzbw$M^w~De{ID(YYFK1o$dKn%NdwP=DYo>3md+-;YPfb+WYX)2 zm54%+FgD>qt)t30QGJfP(;(eNZPbMy*B3hxWR$K*-Jgk6UNOsuq`nu*e5VJosw41b zSDuWIp(ObponNBJT0HWI{a&gg6FxA09zrQL@HA>N$rXGA%m?*K)NUWfRp&D;&qkL7Q1nM>VZXSw=$kWNM67B{vnADxuUUgyKNu>xy)L1IgPfyM@d!WDz8q-{NUTT^!j`i;FF zd}S-I(-ZMEXIoSKpN=JENmp!Zdp*4tb)bzTU*-aPrIdo|+BwSCZQzk|s zU)zPldleP3Xh(V$V%$6Zj-(!@@$p0J=}Kn9r)I80aqDlIm==?Z%7#kqWtDoYbH}z8 zYiYMvTLRdi2_B(e9&|@KR8cAZ+Vw4Ij8AjlRakYjGx=N#1;O<){8S26Jh#Hn;?VCm z3=PT0qoq%mSiIfHHKhG3M7tB6ub~E)?dgNxhWyPs=zt>69*3}zz<^KRd$f6QXq6u{ zeqiW9$p>kU(b;0ihRZKWm*}+;YJ+Snc85QJrBnh4#H8#XrlXek;TZ-c?z_^KNuz_x>kWWMOr7LQu+{=8<&D3&t_pIS;Ke6lR zCw`AWl<}+Gk=vb3AEy2RoN(zp8`N3Lz?Ju$*8&-@&-5&(0X)z&kmtJf?4DwG%1q@V zK1iP6;md*qlZo{FnPot~&Lmcmm`C5HK>lZx=}R?A>1+M(Pgy`j0M0BDGID|KLIu0+ zJX7;dv;YW9$dK2jh@=?maz7d>?o&yNxjR|)V)}#P%Wrgh$fqu^RaH4t|1>7L*hLCw zGJ08XfR1H>LTnH|%|bX=V%v;D*i!j)Tq=djs?X9K2zGu(q-=T!MkWkwodgkx;9tx* zp_7QcgH8CGxtWBKiKr${$1b5w+W#)9nXO$PkiLQQ>Kf0>FNWP!mb=~-4H5DbyU931 zM_j)_!5-@UKbKcHo}awIw|4@E6F{Z2{yTlvB|1?jit}wb-Kn9TCsA#)s7^)8p$@b5+3XRbjkM%RRBs<^myp`(cIRtBz7#0r@l^R%&Gkr<^gsa4maPQ z$gbq^1@n?Y$i@#rPb;nR#$>-jB`gEUcr>la+@B7K*qoedkgoRuG{5J6d^IMn_TG8*^Kspivg1_W!E#JBBN0w zjxO&GdWhLxiTfYG8EQy4Z z5WpNy^lqKbWlw$bUzYl%JC9H0Q+9y8#3s}6mgu^>ZpAlMuAJrL6Pn@eWW`hxa6={q z1tBnY~qG`Y7FtbjW;D=Mj1Gp>mO>1`D|1!N7d^MB(HY;S{#Kgi9 zi{)#lYXW0e0J{NQ)4N23i1i#_BuWU2`$bE|kYwH{57{K!Ky3BS)2h1vzVbZ=^=e-e)=TChm3yYQ}H_BjRgN5fhM@f3WH4nqB$vI z(GYIOM9(Gn*43lQY)%{%~q&xA$?7Jlrw7bB>`nSZS<$0nv)Y{_b#2YR>7bWb%{fmllhhbJMqH_H@U>;JksTHYLcKg{z)gz{Bd}E{1dm`DojfX zma>$8vYVCW%0_Lb>gK!Oy?~9-7*omX^&?=YSk32HQhnLzeq4h*6H3W|E5VM)ft^cC z2)=WC(Q*|c%MbV2{7!e?afz>ebZwVE4@;!M208`W4SR7C0_UeGCCm@Ec-zi*SKX#E zZ7_c)&xKo)iLCRcH1b5B4j(Fsc$2^Ug+7`nN#f1}OX%aY7|AEJo78ZE%N=W4er~Fu zHya(A{7~UE@;y5bhr6m)foD%bzTEh8xNir`OJlZ+YuRfe#(Q+C+Vg726GcJ}O~d|) z)7U;Tp5RAVP85F4_&&z*%xVU;tnT156tBAp4?+8FmDCd*a2FJM)ti1Ev!fy) z7A>-C4(p$KndQ4*$_R2q7or0DJ#s+Eky(Bb6|qFSFb?%)L#G(RIG6E)76A8{fl#Pz z??_`T!ugl?-w*PDH(43f!$PNr?j8>6T!d_k`8zyM45npxJC*Q56)1WLdNFRhRwpC(6LaH&q;O=PG)!WE)!gN(}1;15>*_)V{82cozZt50qRmz9m| zyD@X6Rhtda{N$40>DQv6bjJqz3HHy@S-UM|`E=>HySg!L#M(H2LzeQPXhr<=VrFNB zLg+iq_G0&syNtDfTpKk*v^GLHVBx+6Uj;8`uYwkDJb7r83XY$*y4_XYE3ouMyl*I&=tbyM` zB7ihsnyH!G-9vYdYi%SXx~d6^;l4qq(AC4rV)kSD-ETo8RljC(ktQkrNZyfANXn-n zpW^L1JB!O9b6V2(HEK)N^r&F9{yF3a%2EoP#j~^jlQ}e6v^137%N56gg>^ahU#^SOk_3CV;k`_CX zxW1Fvxo?ZDNQdG1Oauv~Tb6#1G459od)W|(R}=@Z#DH9EnVJ)n)q@Wq1pf*vfbpk5 zQzfo#V*j$L`vNcgC^lyTHLR&&ZPhfK#L@_G_m710SH@Yi#u*g6fObP1QNrIDJ5&9L z-4BdE{Fyw=$SO>G?$4%6qx_(Q%c4i6lO0bbZealkp!JaX)$SLoLgSm$X&%(&Iz+vZ z*o!8h8X;d}v6=Ixz67Z1tFa~-SHkFvgI!&s5C(?b%3Y|iUbq@y-}Hj(Xgst20&628 zBFsU9u{Lbmt~lqlf1`wd4Ldv}@I%z9>4!{nlsnnULF>1w|t%!d;o+`erIDS}I9S@qaBzfhni68s9_ zyX(^c;dMTi;8>c$K@33txPsiS70w+ApqAZz7ZeWac-2t`S3=+pWIlrNdz`TTnKxn1 zROkN^1O5?5aADbzUc$;gMYcmf1-7gPlTIRwMqBL43D2M29xae{LO#7HUGhKnCvL*pGL^HWX#u|P}$v<;THpPv!kC~zXrw+ z+;p%=)b8w=rBU17hDM>PjTsvoqvvtGEA5eR+oiDJZH>DB@8!_IvLnBEvUjz!j0BI9 zC_Izjn#ryc97sfZli$%|EQK_c862*b8yrskRsj&8{+1tYLlbBn%jCi1-Z(bFa&KsR zx#|Q}hi4^8DeN7oaKMDuOtY^J%gyPh-Q?XOS-oWV$KNUKzAxKW9-gSmKdv548F=z` z^}PHI8vZp*WYoxax+>9sJ-IO*2Ui$ECNDT;#3~GBNE-!(u66VBxvK|3;%EK=@bcZWBC2k z55ol<94Gf{CoyLan^=Vcv!h2I&;&0;Z{z9N#sc8L3t7OwM;BV5D0uN=OQ|a@`uPZP z6=E0;d@5$M`+dbXW9?0nJfJGdq%Rg{vb-qk8lh7&&>9U5SLKK9K%&c(V=#i%p;uvO<6mOPKY~c6QeY&M z2$`~lqkIp87xoK6tMUiO%Sl-B6l;8|GN|#0#46~nOGw@#{2W7(MOaZU&aDZ~(WJz9 z<_|(^Y=Kevj9n7L++9`#gwiT-Yuc2eTY^D_F;bdi~ z;61hwe8uhWk7c)}=QI_V0Vzwy8v3;LZm7R}o*_1^A(Jcvh;~*d zNJpW$A%iT8>vv=<$s^<-m6W^~=E6P0Pz09>rIsJwmg{Y+`!0k_%F1RX`$z54dc*(o z>4OsmPb71l?IJ$Ih+9d!$a4uvpi~W8s>tM&6(ySbQwXqs{|D9Afu}EwFlsK!2?BF%ddUL9GK5X--C5{?ULU#|L^-TY&IJmngli5nQ=f zz6vJzf{;!Gn~Lm4%^d&d=I#G3JtPp&pt&7~54#Y8vu|;0({^A|5XBN{VhUcG3cGta}Bc1oJ(+zU_UZRFgifd>(4fWvxiFiBwc&coj* z`f41^m+J&8wcR+*!RA(0&(3dD7HtFn`I4;E$a}larLpYxghMN4Wzx#9_{hr8I&Z_E(x`oQq#D3KgLDuo@;{Re^Y?a3b{&HvvhTcpL6x*2;=X*T zzZ%v+hrH>eNNjJFQ#Q3qW#66A7I_-P?47IG&>PKopHIXah6ZMl173EZ*}Ww{D-xH* zpU^Dcdz6N0CqH_DwG;5~>47!Tc5~;1heMQ$(+tJUf9Feob0&clD6mdU&i2Rm>$BH) zNh0%gE3#o{D}Y4Hw_vfXlaH{kvCkwQlMP&ABl(W#=$Q{8R;Be&;GMRNxbj~R$pBPf zMqh{G+qa;$hbw*gU&2B@!!f3mpkSUiN_gN2jiYh~1$-1Sx$`QcExhfMeg7Ny1krr8 z$+_jz#9ub;kzaY)|4zb3oI$1wjTv-u?U6Q2rs|)pNnj|ZC|SGhKD*9Z`p5U{pT;9R z;IRDWK(me@j>>#9#?<8w7H>Gs;-|%OUFy|+rwDcDTd(0qWW3>=ZV8L!*$GfpJN$58 zs;z~N1>)Ybsr4F~UO!&NH?U-F zkknPY@j9a)9GH>bc_h;~)ChPGf_?QQHhV^7x|ty>ziB4JJEmoG6^Z>WtJ}ElGKncO zg39v-wes9SMN8)PJ?O1!tM`_`;c?B;f~yDvOq5qKf#-`p9oTUu5OBAqHXkm?2B_9h^`qyW_I-8|v->Gwwq#8$|i@rTm10IPp%C z9`F-!!+w(_E-k_|i>O&U$o-6xXZ@kmr6;!6;wpdW`vMKk&SjQZFx(c)8Q<&<`oB{CzkgdH{>B2zH({CHAL@7p zhq)NzfvdkaM2BGgWw35#rL9*pXc~T6?$dmQ!TKcXj>9C1va=tBIdse}>F@S>n^u94 z^xO}8Ze%io)PDmJsE>|yS_KAVdk@;>dBd?i_djiY;w-C6aXKz6#V)n zo(fgDw}D0Nm+_xnFS<}PEKr0dDv(^|VVAd~`NROe@6%ab?-qm`%+8H5Mgcqj=>7JI zxd#sM7O0tc39yL4SCc*%Htmx7wsp+z_$eGq2kYbx`{lz|L#Z0zy5Buy(chwT+7f>egkpapx)1)e~TN@p7L3A{Bp%m z0X(F2FBYI9z2iqy7(s2B5F^!sHd!Mvb+raUYFrG<+2NryI@sQhBfc1NCJl&gR4%Oe zSMB;Fyo@=cb~a0(3u*4tEye_dKG*(tgC8#xMQUbO#TI8OIAw|q#kON?aZ_5iIIEj5 zhyKI|7c9VbC1J9VEny{bXzq=w3~?6=zwD1UVfgeFpN{rp^B;4DobB-6KLyGIY_OVG zMFXx^KQ2a{t|)POrK2T0AfD7Q$lCg;h`iQ4>FT_{y{>2$PJsUXO>EM~SpQuqn@gj~ zNeT{y1J1hq6Q2az8Q=#w?q8K@R18a|eT0&aotH>ypjFcxKIVxEi(1@p#z=cIg#1wD zE_|l_hTok^?NsO!8;z)o$oz9Rl?P`tsPRx1Okq&^wIA&ZuG}xY2~nfq=a_0$3VrJP z@2e&d0F?^s?+1}n8NFkru%G1z+Kxw$fvwHZd&z1&OPH(jickEYM&JG;eh&hqGZ~(d z9^|UQ$>OYdVJn&tB!VA7lXg%J91O1%VVzz>r3USHb%@A+|I%t~ffBR6CE^Ne;ixCL zt+#JS(^=4f=nKZfCLUL`Ducn_g^^;ObY0U=|N9W~!H?c}DwUiB8f8Hug0(!RmbC?E zqE+v1&C0m>Jvw_RvvG}D3qNgQB3f36^FAu!1@Va=8F-K({*@_9!hlU9Th3%YIY+>C znb>QKBbF31`Vvoe!r>3n`E)`krvEYo`419>EqR5+p%i;JxEr{WEt1Eu-V_WT9CrJz zhNRP(f61kP_b~IgzZ28tKpD`S#Wrf{JCuYS%_l$3-=-)W^!nPZbNn_#<}cCVciOny zt77$=Ei3Aq?&O>+lW@rs`1m6Nx@>DAEFn88Hvk6ZQPPAg$8^o0c?%t&Nyw~Qqk|^0 z%XGL<%b7zUN){Px1Fwqbr(*mD_8a*#dS$}< zR| zN{b40_%0zmcIx}*c2f5HiYfEmnjfwf)S{4aYcdSC_QE(o;)j3NEZ6;bZKa%qNT!rv z+wsm({ulrWZ82;$d3!IYqb#)RS#~1Rt_m4c$v{{aMtmMld7V?leJ!ZRqV>oG=njHq zx1OeAP9wU0Ab6EW0;l@_8jDi&lQf(AwN$&i`2#e&)q@a*Llh<@q+@4T57ym05xb6Y zgRxTsnN-;wBFf{!$Hb{Hzdd%>S|x5fliAVDjD6G|6g)y4YB2ZY<*)K>laX{Ao!jBs zFAjv%mqM>$ngonj0@sE&R+lti6P~P_tiWf2Y2Pk3yT|(PQpXkE((`_y-~Z=Q2ynh) zU(i;Q*7#}UDDNx!;#FL6S7oaLF*0?-b!0b*9f1-=lQ62$reD@4U(u61)=3%BMK1FV zZcoWZzLbR*9ZwhhzB!shCSRtrT`2qG7-gq}ek&q>Fx_06;C57&zRA?f-Ogb-J}o%W z{uE8@nxb)U<*i^8)I4_5SnjF&1583UIJUrk7Cboq4OV*o?vUWaS;Oy8_E2x;uUgsX z`Ceho3bM<6KEn?QEn5ptBv=7QBzgfMVP=O$ZPNS+p9d2c3>j&~>P6gFSFXB(frBHJ z(eWP`lV}|!RGh^yHD8r#zU<1s>Vhm5USz>p5jz22adwvj&@~#mnu>rAd@j%p7*3!c zx+*y;;*i@IAA5+6Nxj=c0D$CgCntdG0M5k^I56 zm*k}KXY*ZE5xVHPA)k1M0iWi$(EA9xSwB-MyM0=jaig;Eqxr}FYmvBxfvC^wBwnCP zm(i5o0+&iicBcnO_AinJPAu6bNdT1NyJn=A#$l>EjbBA+08~y)){YdyxJ`Cp{uu}k#Zh=2QZtYqfi{sm#Nt(fdUwui1)G!A*k*c zS;?P6j48xdUwl2gG|5R}r74LtYsq89!(Rsv8!k*sNBK0iCenFWy|7CkTg`&Lm{Fd< zddi8&;r+XW2dw_ogJIcII?m}6;?9{YM$CZL<9l|S_bHdv%YngE@Y58K`=tbbGKOjV zoK@VC4R$dUnA=hn^I@cly zqq^#6Mm&R;TPjp#p<{8r$B<9zYEu#|Zmh{*kg%MtF5)7>;MpKl*=HG)0_^}4xMW!A zDqVD~Cw>tK>*i~g83pXK5yiul5gVJFBN+9|(HS_a(zjckFJI68Qlu+O9+enPF5gbT zQOG1aY@}Sn`LMcflb;6ec9Rj zkX7P31z*IkSbTuQkh;Wq5VMEaFQyCr7!^ctH_2R#p9e*$RdTi?M}Kx?gTv9b89 zB1hlB42hy>bxoa+wX+r^`v-vmEH6j9&fp>FD4fS`6e=eYMA3~;kg`}10%9AUf!tr( zTe+f73>GoRQaNPaz~vM{YVWxmec1<2C**9RwletZXiyT`S zSc*xcoJq0Xvpbxv)?}zsaqX-}k=-$6Fqv%84U%YI8J=b3#Ng*(G2Reg)Nnuv&vdfK*f}EG>K&TR<(Ilv?9SVg1Y9-= zmd#sXL?9WFr`Y5heF@R*ehu;{C>Xb(+wDI;cOyB*V|bI!mXGkWmZYXQ-QC>WuqNS6 zttNi``UvY0Buu*iF8o#g%QfAa)=Lt*;K`*tO?m?(lYxa@`!N2cleu5u5w%L;mR%_` zVG4J7#7_!IvY903&F1}Zr=5dH2M&3(Kg+chDwk?kCV9~MlGpB!$A3^r9^z0D`dGVH zxe5wJ!RJ+#C)Zt=o9iw+|DxOOa!X@9-hLtY6@3`oiYqh3XDp=U;%v+S{gqah53xL7 zKWLD=eKkK}9{%M}1rYa_@yr3%di`83|KRx`=G}s>>4jhuplG_UN4IJ^Uv^ntz$g(j z&&qTr{f07vM1*Z~sFdgx)1p=|q0{)R^W8s+$EzP2z@PY1Ulh>WZ(jr*%dv(z3wdYK z;T9%bekJ2(N2USMjc_r+yrdv&DpoBUD2r%!b`MjmA*{kVKg3hnjywM}e+>V=m%d~E zB5*$Zw>J9~UD&}LT=488@1UFo+Q;j8FqWJbi@$j21zM+}i)7Fk3%;8dzwkazkay_1 zw=epR7Fy3p&?)&|4bcvnls>m#|JCQ+k?>Q9M6+Fn>8GpBWKN+y3lmf@XlGRR0;Yw; zvNu(jA-SNy=kV;PNy44C<&K9j=>=wb|Gc@}1)?kc5065yo9DvJq`|<^u(w;R$)CW) z+IQ>+PqP&N39d)bTx|})chyX?PzH3{Co}xo^SRc}d8b8$*t~(rZukwlZrZL|>*WRmf(e?82~bw3g9nQC`IC^}jE9V+h~` zbvCmYR#B5@oPUk{2wuk~y^$_jw}22Ec{1pax0L|qWFzmA(({E1!{1xlX>wODZx4A> zOhIo_c!pHZXRVbqwO35Qevex8&Jtq(> zKW|R_Wp2w3zaPV9394$?beFdQ;*b1Ev^>zPe z`&16c=rW^Py|b2MqrRVQVDp%e`?&&u-mfs*t#Q0NIEr$C>KW6b9z>~~0pNln=ZwG? z7~T*6Ol<=)tTO*06|>dj)h<}vOtotb3>2Wfo{Ck=$)4`8M*nwV2l%?v1F{*uy*V@c z#X*}icj<-+co8|{KR5=_Z_r7vaGkc)UyXx!^6%I43hK%@)O)B{dB5YsZ$p;OA}z`w zvx?##dHW9PF)+PXzih`vH$}_uSAbb`PCU&O)u!#f`z_~#R~AGTW=c>TZzKXiA!Een zGBWNt%;N#s*v~XdY57t&rz?08Xpcu$R-jC9HZ8hB*L6OS??0a242o08XYHu0vbJMP zJ(O2a0GGf#Ud>hdYyFkjqiWuIryu{OeqDus!`;>|2!jwU2?s#iMo<`2U{I&>-cJE7 zl81nYepHu>lz5AG3ADJjU+j--War6KK5nrz#U~%}D`)s1=|an(-@V`oL26xl<>W&e zua!hl-5@eIh;P+p;sjL-CNI2R817Iv-=X>8_mn`-*+nRj8 znfaY-#abCR^G4ir_OnlrEo=%x8FDV?FE;gq9$nc#hJ=27ewKbCK!ic^T+k74K6@^Ng;Aq5r#SfZHrsp1XrHa_!Llq)u1V)ulTF<~{r{8R5@jF}+y-1&Y8a zc+fpuZO8i3{>%&01pS&UOMRx2g76LWS9J=-EF!jJs%D>z)Qx^LeDHyUGicGKCFcFF+M zc(5?L;ClHn$)w4&7!jLuh`$gB#kh{OAz#bSTATKMvyEuO0~sRIAx8onxJrR+11PP+ z7W*yGGIBjjK~cTak+kPwyo4&Tk(PPI3O9{AsQ)8i2egC*zvLl_(@<3%CHTIDqzgNE z`$T^O#N)9a^RJ@j3MFj1d`s=xOJ@$K>PnRY8k{z8`30x5DZ;2@jJ5qhtY6+sxW{76 zCcQiHlc(pIN5B66cZ@KPVsv~}<0)_~ z9kBUMq1UUxvNI+~2l?M-!7l)QnQ0=WyP_~+=BwMPU?sp1d)Ym@((~MW_$qJ!c!2|a zNq~6+;gfP_S5wTE5d`dloGm#uE|%X*f&S-Op%2VZ7Psv?+U7eKm$$9cMX+bM!>d{^ z@VWugVUljl2pAV_7LmZ>;Py#eDz9Wj>SkPUmpM=m=%(w0C`5oXSRy8#iGqMki%SS1 zplLY?@Em{?L1}Zeg)Pw8)1=^*XU&R@cpRR}lhTlrHJ+DjD!cICx0 zENU)F*#s&cKVEKXu{kQft%_sk`Po46A77GE>g{IEKGLTaE}Ve{B3&I%?FA)WVb3y( zFio6XzFx42bLG>C-M7DW+EG?K*V>gqFMM3wf1V+<950av^gg9|+?KVvvZW*cyka{* zhI&d~!}N@6QifI1`B4MX{y=&o`#H>xf&ymv9@VZUS`goGo8lJp{4*{9SW+@Y5H<)m zxG9Djs~*xTiiz1P9_)D!>naLRxk0}M>>IW?>^|m;oPc?7p(vl`GdXm@kci7u^);c% zGn=~dXwTCsd1f%S<+(<=awDvJe$<<>&AR8;$m`;}?uHd<6^qXQ2$8b~cx0B<+Q}SP zN)+Pt`Mf3d{c_f?a+!VcNqr7S)?Y?`+)}@zk|=WS#klT54qPiY0MGlexPFm3^;R%E zA==fH)ZGb!ESP&;jyN)z)as?Yr3;LK3=BH(4%H6-0P>T5P1IBYO0fsKE5qo(vq?w9 zuWe(3@?D^9etbX0vV3{iuHQ} zexEk$oz<3>#eNQkDT6C1RKpD>i!b=vaw6vA1gq6BOmkn9na)o1^5{N=`(o6itQCnk zvg}6AOvLM5k(jtnfB(uT-w(hMdrX2YDje7G3?3dRFazR@2Z_hlsH>)9-LC%a<|G&G zd-CaWOTiNB=5`P<6(DyP?r=M!HA|bCkP>J&B&~B(JeE)YbN^$dMG6TghJXnh^GepA zjVtXabS1>hn{M-f&kmx(!;I#R#B!2)hefuz#!sjpZ?9`V&rCH&1kwsPNJs0@c(xKlw14z1=~t&d%_f{1Y8XOW&hrs zst43B{}P1bI{k4Bmp~v`*LmNz9K zKe8>;C#zhkO}=C3CN}7O4p-LIEL#6`C2Hm}kOhq70Ci=ATw+XM0pIkMxt^b(Pc^m# z!9K4zih#UEg8A(BevW^GJsDG5`O*Krt-KJmfUnEBL{%&$uW)f3J1-{>0DmYzqlZ)6 z`F?IqWQjKLeNwZ9=N0D?XHVl(yN6l`7a?NkeoU~BegD|`=|Nk3lHvu|mH@^++v89- zxH*PurcYHkqN?+RnO%Gkf&>^*QKsHVT~H0(+>MH^`UvtUQ>IIkPIuE5SsD1{nZaa# z9c*_^Q$(08kS+Vhx!fqx%cRn#@ZfvMcBG(=_U3r0ZU>ng4p(tWZfwH$P3thxF3t={ z%%lQ#Ul`lfM$+^@@1gXX+bSU5-zoyxgt5Kf5n0ZMP?#vhFT-L!W#Cai0DOEyeV4;3 z2u~isZmxs*??+HH(N15U%op%^1|gq`tlye=0wS-6EgSZUr8!>GXDTq_H7Ib^RXVNm zJQu^_WuNsJpa#(Hwl2qMY~_GnoNWq-EzKQ7D-|M2eYdCGhWXeLi7=|6&fd)XT~(EB zQ?_k0EHb1xt!|y5$^ys*zzULADaq+D@!qVc)S29BSK%k7AOV^D2%0c^S;HOPjPQv9 zWMJElGbOG&2tg$lRAtCDeLFWYiX6;6Q9eRYDcEg&q@2$BJBW6W-X}6o+5fzp;^;tu zJ^8&H>vX=EMqggo+Q!M)ee_8+wyz5->?|eaGv9;P5!pep%vXr&qaEMGv$um(N}QoP>Dy z0Y1+5mPRFvB=t9d>#Jjfi34WT*x4=iD-0EINC$yY|IR4@_Lf(pVRd+-Pyt5rM3hy*^>hH1ms)qxEIVo@T(NI)k8WfE}Y{PQc}K_Ry)|wuoJ(b0bG+1B_jbxRuvrSa!{( zHt@a7_x(AU%lUX;q`$-3wkR$YC6PY6b96IQ20;5P{u*V}Yr`q*X>!ba`M+HZ0NSRy z!Y1vi_uyCNKH~%@Qj_|GL|@6oVKp2@4JYz4OQa`H`7`7HO#+xL(|d=vNYFk;^qT#@j`S}9eXm-;L4^v3!L~SjST5%2l9soMi8dR&Oe(|au%Fj zludvd_KXo*`6aXx@D?Bd^DZ0ob9KxKn~f5w?{g=X4K@%2=&So}AtZT+Iv<~2FEg@B z@MgX?Sndh0IXnh7z&W#dy$t^we|vC2fH)ZAV$ko9ja+qD738|8ayB(I6F6ILj&vES z#cZ^(i!!M19ZDS%?3z8ylzq{GUtnR;>u7AiwD-&Md@x_j)FZ4;UJ`Co58q26zpW^N zHliEi=uAI?9bfKfQpF#aJ8tAfH(yZ~=(bvS4xNB2m8q=V3Vs6EIA|$ZKC|e>PX9P1 zHnH|@txTt6tr%f!&e#_!?Xe1VEiq)VogrEveD1xsr6L!#H0?PJ8VUK*jb3LK$tboS zvi+4SkyEFe?WW;e^d=rX4}w+q^4&o8m3ZTeXEUb#qmQXJY}r;Tr);2^aP$o93rlbW@c?Bn00UptZ~?cs zKiRYYnZX!yE29qXn_+Nr50^5vdu*|Ocobfb$1M{6G_1gnqQPd>mfX{E_ zzJ%^`$m>Nu|Ge=z;X!!(0D3f%ipIq__y~y1X?3iJyVfD%_`Fdt>-7~E!dVc(Y9O}+ zy=8w2Ru$-#>tSF|xOJb>kQ}M&n_)NXKd*lr6P%js$|9spXBsUWBF;@NUy;S3hYcLN zb;JeTV~sfrzA(k)J2TVim|eS`I-JUp0G&sel(2Y#snLSSzQ>-fl%T`08nBSVkY3p5 z3+7Fkw9m{*ab~f-jvAr@Xkxb}L8`aU_PP*8?lR~-4|cKU2Uy_|_wGeJSz&%-{|nwC z$sz}&Sj^$(S;~eE^IsM3Rki<7?AXPnoMp5-a(=n&aks@CGHGErj4g!^&J?AOd& zB8h5yy2+cv4C!d8%Paa+RBIj2u63nI%%~Ib==4iYKDuFfB#O4SY+T||p!B0u*fe~F zQyaeF@%OdqDCIn!FGJ%!J6=BKkQs1ak+$E@M;=WJ?2ydx{v^22)1+u?CT%I|F;vzO z$XH=n6h=6{sL{*R{3CTvY3_V{05mPTkiV5c+{gViXC+P)W{*Gpo7=*F8=@^mTmpI7 z^~ug)R4#zt#Q?*>AlcHsA|Ik?%J*Ri?JkiQeG>-O>|1=#++3PDXXWY^a@oU5TNqmI zbsnr)3NG=Mf4x}z&@1r16~W=QVJw;~+z|zC4e(S`-sI=~Bog>&(09HHQ{AYR-29oK zf1BniL3BmXFYA#bL+e%3IA5k3Y1)|3(mFeNyr<*m%UYou3LTtuIC*4d$H>XCw^^Y} zHZgKD$1?g^9^ce{PZ_tSs$zRt)6Qr~*;w{b&rP{64PR@{ZcSR*Uw4vc*cq2Z+*x<^ z>Un%bpK+G?O&4lMr@+}m_H#VGsq5i|l_`UPKruO6&IYwBfdw{Gh`=K$j(|g4n)~@z z*JY{INR3^F8}(QQ?ftFUW?7N|ij5U<%53aY3CPr6!2&KIo2?EHkGbZYf=+ph_u~`n z59BR{NI+{oiz_q~vVCPRTZe}Z*cUB3kGBrky&$#f0E=he@tM{RBO(~uHu4$ZRcw~z zG}mzey2CVDY(f7Az$<0r{``g;b?Z2zSA{#1!+w|~ihg@Ixd4o3$E;*ri%+CPG5qVL z?ve@)5Q^IMei!TBC&LeaU7;`U&QVo(9raQp~#=B+~6`yPp&OOOo2uc9TosxvDnwTHt;R zKA%(ena#E%Y<0NusaPLs`l1K|%k=-b-)yqrBiEwUYnR6k*g7o7#nhCqBwqTE-#=;d z4sr(mLy{2%DGIRxsuMHg&5lr8Q$r8^&iXpr>Fz+60dMH;W@;%3%99Bn{OLoR+t;Xq zG7o$18}v=r+os>+Hy65!#Y7*`f!#bg2~l$H3&L-pbbIijRwzB*cLoa=&y+OFY*|%o zL&c9f{BO_ZA>2Oy?rbGxdK%qfWsl{+L(anDVWT;Fa1XB0@uAc}$1CToC5fJoUpHmw zPS)ZmP2kJc6HDG=C1s{Y&$fd1KczzWL|xrfGMap54!O@(g1@s_Yf?*b-Q(xk3ZRZ~ z54adz^nW|+M53Ra6)oUGp;|eQP3ZP#1D;b37BVIluqSH+0~V1MnjMWU==Liq>^=x1 zTAUAB9&J&MZN*!gTv=`a6ieGZrUK~9C5mg6gFnWvBHyjhXRpoVkRbL89_7or2xak; zx^B1cw_gX(O_z#BQau4{ek0$@7XVe0uvNREehZo=n3bP3V0AC~Ji6{Qo%~HPT0erv zzz1NSc;P zS4;uFU@pTDm;B-)0sY-qGb9JgbtGaj?MXwDRHPoiaQP$Kk)bb+|sS>C;9Fd*CUv zmqMR4{OQ)h>ek|zqvM1=!}+kWHV;_-_1m@ibuK=I*NpGF#*36`^x?-xz=~F9$sM?0 zF{PNRq*+=23*j5hLAK1}!u8y*mzco&ac`v6ZV{&Ta#{Jd`+9%EoX4e9Ft&7_FOphE zR9>~Z`SKGVd2PN}3C?s1cQq80j%pA+KPtBPA)?5-Q}Y%nXIb{QNojJIbc`Kf|LHY& z4d?xeO+At^cu&CfIo(_BE1YD4*@YX0bfgU2bqFXJVvLFw8C(jt*(SRpFV`mZh43-5 z2~2vHgCQqmjxLrl--PFD!DAZf;GAINpum7gvVN3e_&QH5rsR;owl+)`+6g?Mf_^=V zNdR=Qp4d)t6cmnY|C2xPg169s{}CPQ;W=$}oyJT_;9y@IFQ>pIL5#tsy^&3WS-1yR z4t@FtDBrZh?+$iS1`+sFEnb{^5~aR3nuB=C>Y}7SQQx83aa%QJDQ;!xWFPtS@(cnZ zP=gtTs+{@=&lXy9`I$$4w5YaxHCCQFo4+>WBuukat8*w2`5yy(?xGgzd8-OH{cc1W zY+Z%(n)`hf{Yd0M*v@@DIYLoW203-XinZXTb=<&QxV5iXQfsc6B3Ad^S6J+B7nYVm zE`is{#6mkDop97dG+l}T=w)C-I>KFgC<%nsMMSkCua z%1{px+T}ow$>Y+7!s6)qNEE=vFTKHtaSbtBM>+;Se|0^MR}L4}Dg(t7_eAS#2)3zJ z9!g^p$ow70>ePE!LBH$nazy%{ej7s;>x25rlya;^BlyrWb4Q}n%|p}sG+}G*9MFDk zcIo=gqr?axKzKJ0o${zhvML?$b|T|_7XFW_(MMIh9qBK>YJaCChJSK(+6DfQdYQI| zVuAhQWdqBa)Lob*RS`iq*uIHkgvW4_apm^yyzhOW(zOZu2n`I8=pKsn4S!3z7#~+NO63E-2YQ{`ioxLCIm>eg@G}`%?qb4XGH&~PiT*HK zjG&mfEC$qlKY)^i+k>4rb8wT&9C(L72t6977rBgfPtp*xqaW&wG&`oy7pF-$7yFs$ zuMNdAAyi*#wJzFr#{GPSoYsx3zxp>H}Lh=%He%BBC} z+x^VRPXt!7ME|V};+b|v?L?oXQgR;jD{u%U0niT>MJ~C@j;F|V&lN`ls`6=(RHE5x zozkz|yO_<}2^~{5wl5|7ahZ}qT!IC@3Rs&h8WTLqHD?X}pd7BmYtQ2uOrXe}>}gZ$ zQ(KxvUfVj<@Q9=N1Y^w3vXsba0SuEbu5q-HJS;_Tc*0uygeD&30fET>ni}>}z|9Il zJu-B|1p=i=96qazzRMt0`o;tV`A?c{+Lkz$5(Bp1c1@Rz;)_#)Q%6zm_5gByRc zU~1`*4U%D|9*}ow^f<0CgI`Tt&mbc<(^wPNy7a*ayuvlF$E~KvP4!O8SQ2BUt1<3} zquodOJpS|qK5c>vi3HBf{6hGJ+(Ln4PHbuy2%9E3I@8#F&VWNS9E*70fLm!JUna1? zlzIZzXy8AEm0kk@D#}p_%x!{p^VuycfpuXky7~Wa?7#%4tV0^DX0RYwX>}mz_D3M6 zRY7W&+hMnNLHSY!Y96d;*KZNc*v>^xwIYOXJH+A9Wq@>g`kyA^8UDLJwxX;h zA4XZ}iSM%Dc&MSAC)$XzU>(IWsnW={lMCtEJD9+oZb#Sp5-iF_Z~^=@oU9e^XC3|1 zY)eL}PdIq0@Z$7>c8-g&lr8YY3E~7iS>aZS#wc61+hEceIrp zg9RWU75}V9?9$sTtCHpV?(TjSkx+(HImHcUbEkv3HFjNbj{$?-5{%x>ey9XFNH0p2 z=7`iIem-g2Gqvb`-c&M-4uASdLR>I{EZA<%c_6kP9%W2d*7j(x z+&S>AT*nUp?My$biu31;F?GHi_ivvb{I3ahl?ZaV(9qtuqnqb?Oh&<0NfOp%0uWk2 z#(}~%926;c|1)8{{?%rCJ%`{Amyll>qQnf2{}@^e#lZ9nU+Q3V5w&={TM1#!q3AJ$ zVZFU_^7Rltz1y*!{=^wdfejuDhP#J#n@zVy-~JA%Wp1ZZgJCPiT_J6@RjUB=oCLVo z2i&(y0`zB*XTuhnf8c+~0mGGI!Mu9aO-VdukHu-8c0d?KD3|2}Wt>;62$o%(vDM9v z@QVg`%d!i&S>_;b{Vg6UfMCywsSL4BAeI?c$*iYt$VP~2`RRtnMx(7&NiF)$+|)%I z>lhwbfV%j%dc1npe}*8_3O=FkT%T2SPf58#2Wn7}|8=GTO$!H9!h2FP9DOj9tT)`T zy~zM3LrrfNz-nbGrT%dw6_6R9ph;lW=Cr9$HG=R1Ie3(!Qk`y=Q{5X|)oKFEmKW%Q@s$s3_M3fL(Falx-BXU9ssz|?Szl8WUVlN>NC zv}Js`c~Z4}W8vf+mwwI~mH(l0{hdK~{PYiJM%!i$Ut5Pw>B76PPQ3C)d^1@~scDX#KRSP~P3oopnngoY z-elj6w)r{z*v3^D{>yPjFN^oRTyOldQ~&^Aml=6=pzVkLg?z?g7@%CPa0KLe-M=#y zVrBE#fxP^jfsL0BO<<1;KKvVX{|X!ssp*1r2!IIzYY8ep*~mNQSE89vrw=Iw^~=Id z17{D8Uh{P`1zFvy5J~G6#i)BC8F`(;VliCl{8-CbnfBQ}emwZti>*S7((8*0x{$Yiiuuvd-{<00+UCaO-=#w&3!x z-xeNVn)(8BD*=x#SY3s7_C<{Z=8q?b0lO5ef|zpl%c*qqefsuN@%~i}Zxf=^Mj@N6 z(__PlrV^6t_p!1)PO)|70m*ZZ6=ghK8?VE~NQUK_8q7+eruggo3ZJg~s2X*nrf}kG);1FTc5YU@j{#N_iGrmmYuttDIifIG)n3UVPu^Mx3C|b0STF zZHz#cT|P1AwX`OzAYn+{%fR3y13Gn4z2j*hA%?IMcGcbXvrTZh8F~6324}n+Orj+V zioi+|$aaqAZnIp9FnMD!pOh@&d)*CA^aC()j;Mw*jWsBKP3`2;^0rOyCMe`M6KM2n zuX6*WX`~@co9(KRh#+pKW8BZGz8>DoV_yl{cz)rp{9K#MX^$r1a!;lE+A5wYm+aOc z)j*G9H>g=8Sil&yv_3X}ZCtU3-U!)ljO2^mb$68>Efi`r#%~!*+D#a9xBSW*-egUK zwr19a9yaYd_j#mSNSdf>T%4|E&Zkj_J?y{mwei#4l8PT^M-Kay0*bl0+YgpyHsMt$ z<&ZVt}wiAaBhCBI$txY4-ER(#~`9dDvz zClETGYNJiwjjUP$Q*3Vb0BMLRsy_5%ze`EiiM`!*W7JEu+cgM19aF4z>?aZ{{r#A3 zvF~qw>We-gclBt+vRs1cVjE%cJ2FkaZ}31+%pUKrxnR<57 z(v)#a8BCi}plV5iippnUI;{!HU5`$ zRs~@Gn=|d)N?>bX=%pp!i`IP}QWBxv5m9Q>ym10E>*n&Y!dufLhi=|nz30 zsyv%n7T25YW2j9tH`%3v+~BKo4Q5y=MbnodSko*#szoA#&JmMBQ3r8rL)RW7*zGl2 z)9R3Z6wfT=8M{%r{3y=%YiT@Ch&}u?Mlu#DvKZilwKyDBSYY1}N#V}t%k+D0oZKao zFvdeySRmAS55H#yFJ>FbX-t~kP$SPfDd4LbYGl>V<21_t3iymWH*@G}a8h zy*}ue+x*?zzheCp+y!!?s`U^4`Mrp2nE5dlV+)%3p-+7a?g)G|DjXF6T4fgceiC7D z1o>t;e#bkiGL>`)zo+@M$Vi zbkm&&wj_q$6IBLpFNP7*z7iETfWTQrlb|4rX(3-4j_2At#9;Kd#T$ig6Og4rc^_leTD$Pl$*UP}TA|*GBc%c)5s#A*85Pu8h0)4jBQ(vrKmhLcn2C znhfI-B`5ommNbisKvKkzQ`0cT-?W$FCmMVo+sg@9&MKxednUkfzrt}{MKQD?#=Faa z2xKy5()kHp*x8XFI7|;t;dE^@QV9y?J<(&+{WY4FGBLrW`*p4eRl2#J=q$YI%>E1D zdvd1|RXJ?Qxr~8!(PBiHheBD(^52gOA6h)49YA=f2+tqsnUj2J$i{deNWd z8`WZi-(5foc$I6fjaZ)iwK^gszyii}mV)bC9ujd6W!U9PGA;t$H)-2^7$G;GEU!aj zBlEJx8sqLp){$Y8{IoR}Wa$eE1}j=-PW~1HG92zY_+iH1V{$EtC4&7;#wf%EBQI%? z3&N)vx(i{SlelHKfNe)58j3W#JUPyu@K(SICg?`Cuk~0`rTZ6rm!;)A#%D!SEwKCS zk*f_u0(4?{lO`n-MFb~6>xr^u1L4@vp}d2?22T-?S-#Gi=xiGj+;xC>Y zoaXdk?REwF>~JMGz*_C7`NwW%Si|r8UUrB7-o3#>C1@Bs@Xw{tEd&S%B)Ee-f`p!e z0}*AxI03#yuv3qf?bzsDvoU&KA51WXxaK~xDe@#j*Db%Ua%h#WVeVqbdjGR zC01z|Zp2D$g6=-fd{aRk-FCTFQGPu?lnlZIwyVqnUb<9~}EI5{zcIOK)oDr$>YVPSmgK9_& z6I;+?43WMDr;pQBg`{m(lUCw>4vS8U8!=A+oa;F?b;n;A0e2aXfC$^6Coe9}38J&&1NH1DD4&HbaTgbp#o6NtvIX|%hfY=d zT7a=7C;ZOETJ1IIpZAI|#~2%a*FM{DE_%GsgPI!kk}S-db91S}xTzNQ6m;zA#ZCJ> zHXUqcoZ>T?=GAJpFPzL`!^sTrrCunR>1(W2&u|R7ne7iYTkQFe%+>F%clXAyq@=0W zQbX1^3tn7N6M?@`J-pVh`fM4f_ud6N`@{WP0EId0$M=dYozt`89}LgA{O>QXm!w1m zPIP6ibVcc_>#w`ZjQQnlkW2zrO~T*4fRQM-W4h&_&f~@N(VF;M`FV=saA#1}5P8B_ zj2CKpU!V29%7lf09P>?Ivb7G8VI(~ALtB^#T=>z2XcC+tk}UwgdSxxa_(M&|w~eOP3-CmDRcRbVR+4w)*++Hpfs zFLf-fQ#LKtthpUqDukzFV?h7%k7q(%+MEFHu9v~s!(xq*v7jLtfO*i*HF1RZzD}gr z{|IQ!f9LCPHdw+0#G1k7&=shMTNud@6|cpu%CcOFZh(Bql(sK0??ZodGEOL;+WUP{ z>wf&gVSFU96c|V#-kn@_rwGA-_lxyxsZltF=a}-p@{7wERI`pEVLgY7$l z%1aiFL+?-hXmBifaF0(vxYH{UG1R|12kh#W!TO60?F0@3GsNCa? zjq9mWz%J!8gdvL_amzdM@bpk7WsP>-ph@MwY-1wp+%U*3pl9#uZJB2odwC1BTrWw# zyo;DBQ-e!yP`yRD^3_W)5ym!Qwmg}B|K7%7HeH2P_m~@t2<6!qi0?Jp?{pYgSgi3A zINxc|KEUMys(BJCJQra(CMi>0=qvb>i^*(`n6|SYe<<=J~9r$gksnjI>rqt@Efu zoE#f0a7JW#+=Z&x{nQwE1Pa@1mOEVt=W+~W&b|iGwUk)N5&1-|lzkAje-{wv%ZqW}mpySBjT1ZIAK{ zT>qfTSH>!afyZmbjoWaEBkUDIDLXB#o83>!IE)Xid@D3O_H>UI*VP4Aym2Mx+_YR0Pxt=B2>ron`ksRHG&ZRz$#qCI?)#_4pnh2lxvo|=$qE2E) zw+{Ybc4lJE#wK_X#2t)9SnbM8K0z}0_nzd>$)O;^-rE7kqy;y+XjQo-EBC^Sc zM^Cwmg;cD5@_MZkbG~~gA~-X`=X-TMipt0|#1=LZR&E2ky`R5J7VAJsZn6etFa%zX1HF-t~C2ZS57Wzr?Y+wXd%Mq57xF9*4yIG5DF z&6prwn49ItKZl75wAgq;*0q7Ac`uj4sD>wz{BhLYK=vryN*ovM`sWoK0)l=JdG{}%o1`7iUR3eL!?)QY!IVoNd2>40-eIHZ;k9FLfqf z!y+Y( zFe)H@PsA+V8~8!t_Mp2w^uSrU;)s)LYyHU{G1jZdqELL%$`8#&|pVKSW$!D(qsvbIECXLL6#EF$WjipfE&yv`{fp%${)S=xnBh0G6fu zx2-%Amnr>7hlhSrFDF3bc19s3F)ikzCzhwKRf$#Ua_Zd8?E04;r!O^9?<9bu!bZ3bawQ)L;ZmGRUZP z1*e7r4i5KECMRig>LbmUFfB&jqN3As-s>No~xmEB#n!&0J z0Wuok9@pZo-$xqsZw}KN_4qehlIfA7C0(gF2S3?Pk7-a_^&`vw*&+n zWoqe7ua5e(7UhrohJ%^q46`PhLFBiTD^~`(+{BZ+HVHfkCra9#+tXL)3&QEh7k06= zUh?PLaBm=-)_~cpiwOcA9T%p=P?9not+;p~9o)=_fb>U~0=kCz?)IXl*qonp5yD1B zUTIViRU-WTX5>%&4&QwOGsDC2p4`4rKldr$=pV(4MeTPRa{tw%|C|*kkQY5dxOW8K zJ$h}ze8e8+62s?h*Ft32A*8oVmfEK3mHMXia+olF#oMv@Bkeq-hH`_1-do+yWkYW=tV=< zt#)Mm!r?v>cRAI?P;g5-82snbO2&r6g#`pz^iGa+($i$_SfMmYz&<+gYfHmqrvyDb zpMx^Ja(OhnbsJ*VAtA84W4z%BTC3Ho{js(zDbxNtB5K>e?D29{ZD|-@F`kCB|9IG) zzEku2dtcz_J=0;(lres2b+N)>2l&1*_b2ueAV&u(`yvbaqQ~9I2V3g6Dg!kDraIIm z5fmVH=Fg`50LwF&tCmEsDvJ-Q;CW;#mtwASKpX?nE4w2+b-o78Z-@R8 zE-GleWUE)>m7H}|tslyn}bIwL}KoaD|PGTc)N_hRf8P$!wRK~lP zC(cu6u8MI>0$2I-y0o{um?KYtWNL!y2RJTLqiuWTN_~QT-yMwD3W6#PK7Uyq4L%`3U_gAmE1;-VVW+T?nXu!l97-=qtrGs%+l zDL_tGggiDNtlSPU)yfGJigo!sr$QrFimGHCDMGnB!2Xl{wTK$8!<27Whn3iP8nS4E zIT$Dc>7*Px9}Z`beVMKKNZ1m6CE7 zlr)m-4b9-2mX=X#@o+HonYDdSK^0V(1Kth%Po3AHBirS=__W)c57x={w^o6gI<1kO z5h6dp?d3R))uR8SZ#mAXZ@g7rOKpNK-oEruIRsgrUo#$VZe2+|n{#2C2p_gqjPbpV z-+srN*;u#Aym3PGDckA?3bB?s!)xiz{}`f1>bgr~StjX11LZvD4z(k7bs#fV8~lJB zfq`JD*%aT>{j@!TP(e#8McvD_Z+IbWc`x1ru0qtt&K|?C7m_QYPVVI+bPo1XBt7{9 zRr_S6K2e{PBwoeVc=g^3sXP78y`rjVb3A>pR*QlTtJjfofXUy(r+!!@FB+?N7X>O3 zNlC!fQ_hp=OB6n&iH(hOm9_=&487wIf0;QGwwb`1h1ysF%~Ye*3h^nhp&ep~5_ z!ZH>9sBEpJkZYFM$u#lBT2QcA8H0hvSU=ME?0?=eX(C&RN9ZsvC(Cay73|u|g6N^7 zl%-pct*orD?Ys4J!#Z*$>Az0uh%zGc}JfD54hciX3lY0um(5{w3hSAgNU^YF6Z+y+T>6#Bz^ zlDwpvqT`kJ*Tlc=RvCxCj-UrN64`K(pe!20xv=59)K^WmmxQ(yuWloT{rGgtwm;$5 zdaN^+hQRG!hUARWCN(MU!ClmcipMjE3xB`XA6aL1{0ke(~qgZW7^#$ zD&8=!gyhFYmKQ0E-B+;wC#G4lHgo?b!&YkwUz*%JUS%c}f{m=J)H1SBOw2PVABw!M ztd^mpJcFOY*UeueVEoMYRE|_ljVn6Iu)>k#Y z>3Sl*2_ErsN})6RyRTHLC(B%&zpPmQXT8p&OsOjk0t?%byfT!;`dJ93{EU^Kj^lgF zeP?bjF+In&eJw3jhGKQo^I>U=ipiY9t*GL2d|6kqoH{1Cj$$vGE-kxft?h1SwOr#9 z^g`_K6v#2Sp7)!_7ak&?F^_$7d9f<3=)X!QqWtn&33xTOij8oSx}4kTZ|$sF`+r(B z>}$_!CLEoIi(hk}Cpvy3wjp%st5f=Z2W*v4@=~ne*x;bctr`pe;`F9bf{(L$IwgF- zhUd2hMkZ?$a<{h8X?2ZxE?v{2F?svW>p06eolA)H!wH}zni+uUu*L|-092VlaPmP> zma&1)Q#GF3@J0~{N=?)`53m-pJdB6wdm;7W)P5?q^7OR5PXz; z8Ah&xc#%nR-?w-@$1#G-F$*!<$qrP;=nyKtbPqHRq}8ILs_~TR3@e$45Os{QFvd;v z>Fm(fpO!Q^#1;d9E%mD9gx+%hOh3JabQZ9^(J&i_9gI`Em6pC0N zfV|*VgaEboCKVAF*)5JEnMwIV--7I<@5r@*gwdzEibwUaC@IYaL?lC(tlHriT<+pn)NmmgQSPtrTaI-}@Ma+tk!g z-R3kOvMq}3-hQatF0jOE&&Op%*c7j4)0LvIK;k(?NiK0%l!_6x+g163`FiroRs?Eb zu&s8&-MQKGaE8peVpZEP%-3)aR! zmU)u)g=`xY4|5o}YV9d1hXj@(N(iy&QGj z3Uh`4@qsRo&BcWn41KRP#8R@>)@Lm#cK>Nsp8YlzyI4F4EhjswrtqpH6z842ToNgawAq+c332G+ zlM!y*2)KN&a-(ROfi|+xOx#vk?|?My$wsn&*LLmbd78tHv0cYq$%dUu7_GjSX$e6p znS|KQ$u}KH8EM4n)0eGJeTi`16qf%YFrLAzX$WWT*N#rh9J@Ey_xmJE>|3qQj7#TRPOIJHbmqKVTme>Oveb2W%Afj+ z?={YMQ8EUGndVYUfgb%0qCUz}*3Ls#OoD^8G_{L!sH2Rvjg}2^NIUj1bhnV&Vt-UBEc#F+qkET z3>*KB4+FVAzi)w`=9ghVYm#@2U8?D zdN3->=hTr~*5v#8QV-ADek3x}r$JN#OC<_9W}q<4duWV)W8@({4BT^Fr>B}c zBP*&+PIwb*ydU=T73w5Opxpjp+>;B?b(#=;Nc9s*OQlLWQ@woe`Dph zG;BjW-#C=wbp>yIuHrz2@>L_ld%sH%tY3PnKoXDB#UgBASppFgGfx!RtNzb zKKmG0Um!s~CLJP}*5@7ZBk*|Rzn19WBkZ>L&HwMQGFY+Tc|G|R6)k0eAqhRHQ))qj zr0}4AV!x8@7l_^DYo^gY92&6(r-?Gkf`P-S;m3WY;V}2{3dKel9ee9%ms^WzzisEo zHssY7q5vBBZy^;@v3C_vN&f25Dwdq#E$(KD?i-AjANtHSJj7np5nT3lvfW!%qB5kX za15&-C8dwzqmhwUl0Q{PHnXM3Ai4-f3MdcZgzQBw4Ld*I)^*Z>$`y=fg~cca#Q8?$ zndWTYG*JtyRcU^PPTJ>?5Goch1ENZv(Z1hQZE{L+gvr;kClkWXHjhTYtSq>i2~dc> z^Hjaf61g5LFh<93oKW&YJl?zc{%c3Q0uxS1jHu8os8(Jzxo`C$kE$QL)rgn(nfQ!i z(2JOlhhsbI*)ROIfSOnHx%B?-AS)>+CBJX(H|W(I0CY4Wou8j)hzX|_ zE{8NUL)T3dBA;}8(-@4IxR{~3Oxm&7{kLs-&f8FIUuXHFY*1#yQl)0SFL>B3D>BRE zDT&79XD7)dMI8wFCoF)F*ck0%Gc8hbOb5e&oc|U8i<`YaTD2T_Z3VP$>t03vZ4vPn zJHU=FBK2V-rM3K`(1(Odk=m{L3HJD#JfK3f!s>PlY4yAiPm4KbX9*N z0&2sgVm)Q!j}?=w$#3cKL76-K6W<1gHCJ zn6Tp0LJXKB@&a~Lal*7uj)w=-@gfM1YjsjkafEX zBut+HAUNoMYcRTr;s{G-h~h$jPUFfT;SqQhf&v$%Y?4&)wRI9eidm7m;U9kE$@`x{ zAF(?xyL{Y`4z|m#dS+5Cks7xNjoYl2pc?DsdYPcbv$J>+O_WJ2C*0dbbQ-W z-*+!Iwyk<@^?I%N5zG3Jd$c)|GkAy6!TkHm)u`XN+G4c<(qHb|Y7` z)nRE5aNLrzaS%7aE=cCTkX%s9$kgC+pCcy=N$e8!(zRuLHTcFf(+PbHFICD_M|3i&nF^KCQ} z#9c7FWojcL1m{HYupbc$CB=#s%*7G)Vy`c)eJ7iV^WpWk-fJ5U>1zodXFy4YVl>#V zirU0mdq0RBjIY*s2(&2&qd$0QL}QR7TRL;5oqg%e)_dR(XZq({d58Vwx@&AuzaD|< z(-{zDAs`S##@JL6@&1S2I6BwTp}L^yU-{>&ZPuiZ?fSo7K_szBOb`g>#pBL6*JfZ1 za6*ju>kM2SmQEaX*wAfNbMuz=q7pHE?N_7iL`D7wDv{P79gnZqUI>T~OPkVKEY1u2 z?%oD|HOH6;=cQF8z<3myF9g%k*efIVcJ=K@m2fx8=qTW!Ur#fsi~ZE>Td{*3t?xPnizs&0W^paQ)GS)y-b1tc#m+S z_ZCAF^OvsQsBv#Sjp@@r^<}1Jg@iH2kmJcb26+- z-5J4Kj*=>Y^~VT=ANlbM@g|k93OcpY1j~7G@q$7d|HTV_zN)$&|DA=tJ!}Lq2KYvz zPZID{#J{}`F>ODx{6a^=PmpkbA9me)p8ZCIz4p9+kAhC*5b0<=^7E`OzfPF^<-ZhS zlyHNDD~Ba*1+b21d=JT5sfjfR)n zPv0ioFZ77u;C?2&12gv!gq9L$=->9j$BW&8ZxT#@&qI&4U+unwW!58%@LfE7vTOHL z61}+#Y1)sMwFv@k%J`)it1Bt&q!4NH)TuHUA$iWSPJK79-yDC07Lp$rdQ|+loZm{Xor`WLJBW&?kZ`tcFf8ZU*LsvS(k5HJ# zsIP@ez22tH*hY}>d7gScC35o9_Ta&TwF=|7agE<$u}&|V5eYD;8%awPM5@um6!V_( zR#Y$ro)k8vlcBGMnEkih^{`#{z>C(qS9iu<1Jk|a>|etiHJkwMqZKu$FO!yvyY*n1mpZ0;7@;pMbED zig1y^wP|83>De}M!b}@;$rawlZdl=_OqpcEHrv7W8oeV-hgb#-F4v-&YN$Y9Qm>YU zL?st86D$A#KmbWZK~(fM-~4e4H%0p^un047oXH}LEg=-}qhsqW2a|ag^c^QSwkf3I#fbH+5U_u{k9 zvVt74FV--1F-9w!>+(zDi?tL2qF8NKoDhOZKGlb(Ooc(KEL~b|zuNZz>)$O?!-9_J zE;akvjm$xRFlJHcUeJr@%(UJ2KEU?hb5~1k zD95CXU;^`r0{OTXIiE|KtJseB7rKgTk+5kW){JMMm{y*_r54dUGxqYDcI+0IohAGBInm8~})VI9)&HF0m}pvd@%Wfpq(@TBj#RIROF6>(b;HRU!L17o&KWp@%6e30b!E7sWmfi z-t1X+!@Um>mnsMDS!(m<&#?O+eR&z+On@ThI!b(+G8jC^((L1UC)1E{rmQ|Dddd*`$I3;^%tFF`3YrA-Kw?s z-g)0HyyYqU2fABH%*220*=I8c90pQe>OKlG*=zJyT_J_~YZ!A@k-58>ZTMU9eEG{D z2GB*{d+!6g?>`?heKyA}2Vani_@(#5n11wvV){q_`V>3uw9{>P&unWfD8wK4*gq<&m{nj?eiR2QA$gfhN}5a>6k*aJ9JRr5H|ts zBk;b!GX28HUjE}6^!@I3p@sG)R@(SU6YRCA(uNEhY)Q3Ccsqm@ zzDW})i#;V!G%Dc={iPs4)Dda%u6rM`@sQrZB#HWV@-#c@H|N`)OkYf>A+QoRN%>m7 zu2_}v)mjRH)#JE7@a2VwK5S1XlH-g=y^7V9N){}2c}Sc8tq$fjVzOiO(EGHikz#R1 zOci_VmMd-FUAD0UcN}WvGe5PzUw5}ZJ25@O3TMy3ALj2&3mn3D+9IAICXS<-#4}=Sm_&x>KJYW)a%x z@2uBlSC%Ku_K$ zTfMRRlZL}Zx0Fb?LmpX*z07(Q| zn2S&=39Yto=OpV^(9QbyEU<3*IhMirzUF3^0iX=D-+6Oq+h<>XVU;CIEHjNDHxTN2 z%wlr()6}qf#?m#XKMCz$T5qYk5 z8}Hg9?@zRT{rlO}$&+o%?RK+64%nL^0OIT88zbRcIpsBe?`#+5APbs`7)R(CVLM`e z8v2cb{3P-YDkBCDWJ*~+vEp;ExRJGvM%R*7SaA4u!Z$W^@>FIt;OVa@ff8JJu6m*b zaSc;Q)0cOD5X`;GR#}MgN4?t}E`Vzx3aZ_D2(#^@R~1y7#C&RcnZ8XANL(T3>W zR%zO5RU+SxR6O=)&6;EP-Srmtv=8zbTYHQeX;}=p)G^Jl9{+{lPZFy(Hm2%I35OcE zE;TjTX3dyM_Q!KM!h7+ZCHma+ubXXy-sD^;BN`)Vq$S)$P>a4+yITd8XR*0StfDnqti$uc2pF;-$Be@jNToL^@aG0b$`vEkY{}O zZaOF^1f4(7iB@df|E!AVBZwJ8HlKd+wJ}o}(|iG@`N_7+VP{zXg3csGDj`g=rtZnY z)Z9{!b0cMAflJpO|d;+ z39%`O>b#f`1G&H)DxB2`#zU)$MOblTrfA&XixU>KD5+6RB@zl^_ck3DeSmbt7<|O| z7ZB}hzy0 zabMco?~S!bpMIGvkKGBTl1k>f|M>3_lE&<@-DZ|lRS14*01I=x;~|mW)i%zy8_!cPGjq0fQLfr#=ZY z(1Vq9bAN#!yb#k?KUc;4JjKR$}JUXA4C@_!Rr7QU9o>p%lbqcys zy)Cyn%3&qsX)Il2ufO&x*GX0h*Y3XK7S@3{{q^J#NI;YK0UH-+{IaGh<)}H*dT6Y~Y{?RY>(C4pjW9i<9nZOg|N2+dv>*b zNAF_4JL^Ppji-QAwF6_|i~@;G&|q4W5uGswW)&jwKI@!O8_D z-_|pO*WENEiEK3ZB5_~C{x+E5< zX^!LJGL~waN?5m~i4oq1bK$(&<8bG_Lv+J%cYOKFhCqv+-Zl-@TvClGf|U&OcP4t+ zk^Z1TWZ(Pbwa4w;GtacibD3r&aaAh3By~)iNipIiuhS;s*5~QQx;KPY`LdbtYHz;# zv3uygx(%@bogt9y*O84(ghthR$8*9ajUz&99bb8gG=H7V$-zUL;<4Bv5-mG} zpNKEkLI^7j zBeGu=FA~$DH5g1c~|_+W|n1=x|L^v=P8)w6(O&R zu&g!e8;zuog^6eIyc&*MMtkPSZ|(P%5NN*$Ejv96!cIA0-~EY}*O|CJ#XhaIhJGN% zmZ~tr2oCN@z>yS1)xi8N5x{fA!{DVg`&c&uh%*z8>7DhjoPX6!C9C7z^nOaGT|*?g zYKGam_wHwBpL(p_{>Rg79#he(%9jwAevm!;=w0^QORrjaa!2k*AP1%rL{ywd`c`W! z#KZ{1gL7me7Lvw9rbgBiL6I?WOU5#fayHe#-!CAL10B8AZc*H{j^e85RydD~9bCPCu zBvjZNW5cv$`GVAyEw#gsJKy?~;#UE9>PZW&+B}A&^K3mAJe4v8OqGbJj=)5JyZI(> zdY7I9!LjLf{)Ly?wwnwgI^QDjS*<~k!^zPt2fAP1Ln!98$5;aUUU&})gbiFHflcqB zuctPvZ#3Y%*#Lef@XoTFC@7I?ikDwD1lpYO2ZRD2#QZ^-!5eOf=5vXCHEx`3f+qdk zzwfe5H{ZgZAWPrEA_o11a2YvR0AiRY1PA$qY#QhEFsgL0z{AsR`p0kEo`;-lsqOM? zG2^Iv?Y%csnQ&ea+*O5sPH6r&GPX41%d0;sEg5~w?dW`by*5Gsf+y`z%pcMQZi z?`mJZ`yA<4Cm{D?%7yWZ0gEt`_YQ_a{ff+oAW=`7-10yIo|oVGggT75-AUFyyPYjA zA#5e(mRoPK>9dLDo-RQkXow-CYtT0t8KVx`Prq{F&2tnqM$BLDCmk+@c5@nrFXqB= zwk1TX1~CG4D_?M5Bfm=K^(Uc89)0lscFb;Dk`QOHRV-cN?DOV(9!e23$Njliqzb ziryz&3XO8Oz##E6NJQHZ9P2ggn9R65->;FO7C|^saE5V;NXc*y8zy;Ts#PpnY%jg~ zrav{6tfVS?5?QZ3(+?Ga&|i-4q6Z4U6d27jG}3nAr}Y zNeMct(XZtiwgZN>;Wu`u#1}sn0$^X;7EsHJBZPOz6g%mplkBWtA8sF$c68$nM_Tdh zadzOZ_OPwD-Old4?|yszm3Ig;Dq^Z&A)4wGM(<=w712|C@v@Qy_VmLKSl>-|CsMfC zfIi**`uz?)MmaJ>Z$V0}SD)0hWa^V9P4fD*NKsM`imU({0*J;4{a}2t)0aPp9@tHlA7bpqj{A?>SGh^kJ7U?m#+w%d&IdrHbMH^)LwkA44bw;%`) z#q_1g;ily=?(5NBlW@?hfOw~6CfnDakFyglx!v+}voKc^iEhbaW_p!ckM2Ee%J}!~ zfybY<(graPg~^3Ldq;#YL&y^e;!^LkX{6PQbSJu1LYKHs$3{bJSWS%-IahB|FBD04 z-orvn$9oryUxH0F+2Yc3a&52OnPEfGJo5amU2WQ@Z`!QcGhuuZ;CX*lpgN@?wqj9j zf|*(@4PH?cm0+j=#@{hF$9C9-l)=S|i7Sv~xk^ei=OZgzFi&^!&yRa+7*?s}r!TNN zMNf-DP?;psh~@mvr=Qv3=ls>W z7Id(}B|!*$@Rn<>FFqE@(5`ecGA3jFNM)VIbk8)BK&7RnS!!lG(g|n!`BaXpL>A9f zXkQv>q?6M#xklIKA;%CS1%cF4_oARVet-*tyXd@N5--1O2uSd4!y*)D(YYC;B=neB zyBz!LpZ?%o>n1{&b|Ke&mx2QO^2<-`_~VZC_9%Ll9R5nbnPLy#b*uFQnH%e?2)9{h zpMCO?ef7yZI0uS3CwZLnabJCIN1pKq+ji72f1)~w3N+OFwePIPB4X+h;_H^^nASSp zQb1UZD&v=|sStpW%cDJ0%tKSZQVB>y9``yl@8mSrcvxH60*EmcF0yq_UL=n)*&Z!Lru{ut*Wy;o-0Nqh5(sIGs963-cZBwH0EEI61V;8zx>@F zCSpEw-h4Y~?_F)&Hxuj|m`9JU-R#<{&av&70=AP(ycII>Qi1mgSQ?x|47-kXGJ=fa z!l$DV%W1C$vwcbmVO)u4ubBQw+0Lxh4*iFP7?>Ck=V1D>iXdJfU?6DJRgvx$21}_X z!e2rxxxh>U8`W>n&HIj6vNH?d5UW&||LEdEh}sp=9Pj%GCv}YR=XS6mLpL>h^#yV! zpnao-8v(^W`|KmzYQ%mBa3L{~ggTcUoI0s}-(7spJkOJ}qXIE8kq)Q1I{WaQS8S)F zF1Afa4u^~-+Srf3w13=kyB&8Z8AL0vY>GkZI{@EgzH?nwmA&=u2jOAP+072xXK(9O zfKwmCDM?r&lo7E(LITBOm^jl%I0T@j#f5g`bvO9AR5CHnE1qkIA9<|hw9jCQ<=2*o zl|wcpK}SNRo-YAJJ|1-`|^|CSJ0d$&bkrJxz< zpdb9_12%C^nVUIlYU-`~z!7%To?Ba%3_?P1&?~}?pa(5W{50cKL|Q^Z3Z~yqIpKU1 z(z>X)*edW3=-3VuKYc(Xx)y?;gaDijk*$M)p!3_~&p&Jbd~2Nb@7>*|%$P$=mM!fM zmtSnJJol)be)%nAVhi2+qRXzb{tw(`ePDXYjQ7K%A@dM;UPyED6+1u~_ZH2XXxHBN zw54Tta>2HYvE`k1`IYUm~>LaK=x!$|8b!pgCf!iWNW`<=23wyV=H@@thZEej);vKC0O( zuf1ve?X!pV!ipk+PQwF9t}XcK|G7~8qAJSxry-eIkAhfllfU}R@15`k@k+uhV@5DL z!6XqT5H9p>8|424J0**O6)%}@|MvI@f?U8;CCt&* zM9ewmmBn`ZYoDP+b+Q?=7y0#Xy!pC4{Lp=Df4`DJ)e9GaF9c@#&(n|advKWwF8tjU zcHy~4;DoA-fx*^_Lrr~cxhD#1PalrRko~52q5F3%sKUb8_Z&H;lx3#}w5i=LzmN~z?JV?H}Dl+v)%syJLSVR2CS_J_K zc;IdVX4wP~aN>l|?CRT}ZF~axw9T?tA}LXArb! zupPM99z^oH#on1Q+xqnBZ(ojm!ybP889V(q$5@BN5}sQNokm0RFswkyu+| zf4${CZ$q@`GI(3tX_O=y4NyqiBy*q|564z(TlOXwn6ZR8!Izap5)Q>@#>Bu7F?>JK zRtx7hEW{9LA+CBba_#fpz-rjA&8!P&=9ZxS0)y_|yV|SIJ#L?#@LTJ)&0w@*IUe#| z_yvu6HCUMA2wJy>At}1HCADEuP(4&{Raqqv@1r!af%Bp9{+(MBcZO7S=G&s1QWqNUVhmSXk*3?0=PLZAcs&w(kfjGM3Uz{ zM?jQb{fF9FV+PpKM;~MV_x}5iTpxZo)*gG}=~Y7Sh!al6IdBWxafj_~;E-PGN^pHF z35vu;hhG-?gow?^*u2Hz<%$Tmev#&{)6vvEb(oez0L$GP-(Bk<;8YriCNyIltorup z>u+9UXf(+E!V3FfYVqq%Wl5)F2A-@rw%r0+B7@l*XP(SJ8VXfo+2yg z(aWy9<_g>4*vnwlWke6mw?ABTj%~H&=C;!Y-O=8Qs9SxG-T<}MtOYT-b|kLZeLb|_ zMcr)(arno7{k1JAF0~3W@?;_?$OoYv0en#5-H}f%Q-6{M>aB#?d%YwmLQX=v4_@=> z=@ME33_9d=;9X>1*28&h1RGV=N+p7}TNW^jyZ<}dzP1`+{2jY=w@a=#!%n;GCL#}Z z@r-?X^v$>4Y@3d_-nz99nd(v$p^)zvOyb|&IEg`tynh3Ubn1A&)XYTt;KPY_*DDho z{>5e0R#DbqoA0%)_3qil>PcASL=ar5^_0y}E*uQsdjCUzPPqPCi>&Au6ij#IL3#I*k&w}El@DVAzR|U+!f>(b5k zAu5?l3C>D?`O9BHq6Si1)>$b|P}2M}(n%14blnNRfV}pEujE?Qr{F#fnAvbsVo%Tc z;fN;&KX0C#enL_7J{1viom-KhhwI|YwF?67@eTuM)bJ02YBxgEsz@vHIq$V!*{5Gl zus%C)!E`I4(-G(8GVbZETuag57UVxDT4 zGupWNN0KvVrr|-1pMCjFyXEn*)|ZsN3yT#1*zB}p_aznp4676uR|xg?+wNpzj(OXz zz5g}q*Qv|-m@ZJpAy&~%~bbPdf+5HD}Q=O#V7vyI$xFMILs=dCnZ;V~;+*qn&unK?wX}dexRrm}FO4R`wlRG*|*uJq$b<^L-M217%AJ?e2d*>Q~fP zmRfFJK8(M_j@fTF%gxYqJEmkZ-ctiCG;<;o|BBhrnu~7#kL7jlOgQHd@o}RK)+cyV zNSCW23m24R$&f}3T~#V(Nm+8-gzzzRlWj-rj{g8d77fg<&}I52AR-t$>L8>VB4{SAekn{>#&Y@qrsp@_ zc*hTBf?K*;W$=0qv_CRqB}C0DDVk${y8a&9V7-A> zM^wC2xHuDF@W9S<)4mx0g2g4)vv)sx@#)9x-#hGNCmy~(iQft#)?&bDryBc)*jrXN zTaO$7X3sl2=-i`^6@L4cz5MoAJK^B{EV*VjZN>r3$ENXv5p$K-JS&;Tlc}$!rqMd~8st3n6Mnb*2|?t(-Y0@T5INikHf$1iP_g_xSEc7{#+G?!lo60IG~ zu#y=Q<<_fbcYEggbM04W|IPZbJ$=?}9E-c#1;0Dfaxu&AyZ1gsw=2e_gXz3DV}57h z{q;R%8%QPnZGBCd{gaf)cRl_JPI>uw^Oss;$qelG+uGKfY-*V>@&=f`97P-P)=0wP z&XOwZv(G>F$8;cu!_>k;+issDh;rH+Gk>|)pTM%nCgOt87CM%5P&;qRpsiJNiY(!kPz?1y-gx#Qd+>n=Y-u%ki#4hqJd{kJxcH4QZCL=KpQfc| zv`euMKmN>)zvwomNadipS2g8@*mR@eP*UN&7_*QW=Qxc|6YKu=JMVBnck5PQGiFY= zEq2(`_TOt4#-$m1k2FF0kDT#)^&eu7+%N_|l~NqAsx7ZmzGwCOV!|{_O3y}7gISAe z<7BIMWF*(N+EL*>?X&xiHs-i}7;2dV^Y3g67A#^cJlB4E!jX38J@;B6NxqWW)>e2p%=38tBAGv_b1?u^&VQ|Okf&cX}6 zo5=#;!I5YrQ1pO1jZ2r>%dfnuM&noy!=@a1Ai_sGtVfz@pdlHK>3aPHyD&_`QJv7c z^79(u%*nJV0rR{9H6gOP&p`@Sf#x4s9&tjHBccKhCG!oF!G{F!D3w_WX$@f-plVi0 z#IaLKm1YGfV48wJr8F?h0E+BBE40Dw2*6p&oA%=6mk)t9W&Dl;j+|tb0yQ-uMB`uy zGWpSW#J?4%zYzpXjCCeCsl@Ob=;7UriB*n$Ng$u1`^n>$6}3TlbZsJ?g)up4;v$!l z(St;*PU2*nWvpl;`uz_CjWqsbr2gK$dwT_uwjXEy5kS}OwKO3@^uVwntKgpzBx;}i z4)B*wCDm>*!%-bOcd?5uzuYeR%PscpoFd3#x+N1;FOBo*ndz2}7MG6Tm`Z?_+S)Sv z?@KS+=6n9y>qx-@){R*#iwJIW?KRgjWfi6jclBUb^oL{!aYCxB18>=%?tR^Q_vnts zCFW^1=A1Fs7vI3dno@Wip8CRkw|v2vEW>u+Z7(}y$Mp&1GY7wckZ0nrH{W3k%3+9# zyuZ$rdpOJ%zd4X$%*n zXJq3f&wFN~RbsKwkioopbM3D;-fO2{cAIr1RPlmxECiB(D2MI4yJaCLHNcD-VZ72n z)e)Uf_!X_QaBbhIfIgf;74sZv$ccFCi>NdroJeEMEwC|XoMc`yTD;W$`{Jv1 zB#``B?eMm@6u&<{r zv|FC~#5y9Jl`NV8Allo=_10qu0QyE#!_-S?AIv#PehVC|7Y!601%`Dveu}*(5=~Nc z2^UlFMzccD^qDOn3K)ixAh;aS7YQPSgj!K(j&Pqx-2=x}gG%^vkZEp{)1HQO8lcm< zLjs8W4Ky885&QHV8?h|tSZjY%Z%lmA>JSj0Y{LRZfTW&U6l9U1Q%EPIN&qnv36&Y; z#ui2isah5VtJ1}Co{7#=UAUiPLiAKxo^4pmbtFwLt1Q$v8zs(>{+YSVXBN0 z_UK&PxvvGl)`3mof~-(Vby|ywfBg+c+CA4?V#l9%m95vmmrb98=QK`Zx7>Wa|E@p? z*mBd6)}Cm384QfpqJ4?6&Vd0x^~_WD@N;iC)Uwf#b35nT^r;hV|NRfPgZA485d)&@ z#1$OG>pO)It&3*Qwd2paJU|*V|Ew8P?Tj%O+xDA{AT%&ZTht+16lq_IJ&>K-(Z-y0 zhCTZFuhAUKt!J;^_8j4JNAJ3;9ku_CmWl>fBcX->0ZC|o3J-kL5r^8VZ#`?zzBAtx z5Nk#u9_56JJ@Gfk+g6)zZ72Ws80&_=O9qaH8H|@#RxD)-T&;ca)mQfW3$OOH!v&qP zZAn?J2aTCFWfD`={$az048$;i#<(0;P|HIvYw-fDw%M}gXj`n3Sco-=)}==u`{(@+ z+8O6vWY53(u?^_m)fN>MS$7;HU;WPu{_EYnyY02duD124tt>CE1B!=e5v_&}lo8qU zO$4%MpL^aW&kGK9eY$ldG(ffWCScAKnEuJ9o@u9@e4=F`cqCz6kw7P==^=~Rmb2tX zWa3lljIcCe@tfMiT+O^BFEX<89#Q~wV!c+?JwGIM-Fg%9zPFMm0>%OR~aKW-=6`jHb2@qV4 zeczJkLZyHI{!JeKkzksa|!Bq}`uKA7D zo?(Ewb7$FQF#hi-ma#Hp{05WpIjmGTZA!z}K2 z;31oiV|ULU-E1~Y-)y8Ef7GFtlduSl3n5oRvEzS))G!I|ZN%Yj!wojDKacs1U2yH4 zSPKdmDaIM_gx}f5BcHWlXt@cb0v0o7Mn4R^8lRfZcI$QbSRMH@{`1~6>t4_i2fYIN zWrMvp_W%6%UGLThr$cGeIWA-Fw^hcE`OB+C_g2Ln+zqDs3Qrz8rsz8FS{?EqC7Qzwc^kZ>9w*#BOC( zwH0GADK24pSby+1tCX`*a-Ko=nJ<9!AedH3!2?XgFmCY|W*D}~!hr<`G%ZMK&+BvTH=4*LVN%TJfBszi*xlIb;30H5W@_r>tzOn+4nT)U17S2Vn& z`Ojk1i9UW0ql9RmaP(m?j-GbGC70M2lY=LAzurCEe6BF4N`g{ePUcCpemU^f5zoI0 zi$PgMr7fH{%K>%4`RCiOk2~55Ix@OmQ(+hh;EgbT%($6pjrQ&vW9_0FAG7ra51=73 z>CUyg9=_cA_UKGBzS;2nkbEbCx-%B66F3@RX2)WXk!i;q`YZeAt#{hgrL{I-=ujK~ z%{O+(9rxKEFa4c$&=?v+3KF7I*sf#xUiV&u?cO`@w|gJ@mtF9u+q`PcJm}Y_2SNa8 zeW|~urkbdHMEhfwO&UWO)g$Vl{dt?WExN6J?hZ7JLY8D1!w%$w%clRyYAL|?8#T&<%xyXxl>b``ei(9dBD~I1H0J%p6!XQirs5jk(L-DO^f}j!m32 z*olEa++ z*>e|Ihy^7vz+3L$&M$~B)@TT{DdU$Xs(8Pfwlv-*e$ANVJ-6LqC!BSO1FkQ`Q_gxx z$CAzy4YY8&f_Gu7zPyV*t~H^!ML%R13HyP}o&)T-u7o4Uoy zx`y{5UE8$Ld}GAWqfV|fH4wGvcywhXAVmA|_1XymIpqm|MSy+ONjp9c3i2?%yW7s& z+4{pCwb$PKKYQ}&=j`d%$F3G2E;xUT?Xu$zXyn5!U9$$N%RSjoJ;H$kr=$@yWe!6C zTOWLu*Z1|elYPDK0f*YITWx4bc#qc;URNc;ao5nI9oi`e*&3n%7WC>yaGpQf?#EqZ zZNh#h_4w%!dI8RyrWY(xOGd$W!myV~zyhP&;st-bK(2X@ibH!|m4 zZff6MMs2Z$9dX#9Oex&P`jU94zPi%cY+zQgaXYW#=VAJKv_K?(A@>u4t7@WwOz@+& z9)vM@v+Ld(9i43o(p)&e7?UA|NTOO=CqNh!95LUN=-*p|r zi)L6gVrp8nFbJ{ZCj^|EH)kgB;yk_)0lh>x<7G{UK$|jtM-)1jBQ5KMq)Mh1J>g7$ z=-|Fqjwf_kd6_95^zx+_rqkR!5<(yuFJ8RJ-h1~QzvQ{6o^HBy8|Ex;z0Fppkh&X; z+{6YC9$}fxDay$u-!xZA&S+U8lnn^n@=Vx{82aK^^gucGWij+1?M-EQ0U+EJU#_hX z(D!!j6v8KsODi^fEyvPY0-1CrMCwt8?QeVSvc3IzNr{j7kN}z0-N;FTZDj3#*-SCE1z$l zYx(JzfHA$-68+M%_661*Bg*tO5ow-@TtbuOV^C!~?=;$`zq_4PX3EC~O|qKgEW`__ z!;{;?z`~Ri8dy`mu*1q(nypBvA3X#i(jTmmjhph*b zCqM+_YHz;zj?J7o(|^(X7j*54*+0uQBE>G3N%Wn!U-Msdjnk*Qxr#CGuD!EWl)eneD52hfptS!W<1+;GAdy)j4yUdzPY2qVt! z(205A1vX&NU}CIq=NHKmckxU58Tcm(p}$U-B zuWhp>!9yT8ikhXl&mtf*XUwp13GjO3qT9xrdZ8hU4)XF5~ZKw@HgQ6no7`? z8kmF3@X{Q`Ph@RK@(7jDT`l2Ht$vtis~MPR>$$$Fekn{I+X2FCBSB+o6Yw-oz_)KTug_e|hd;5-rK5{>k$Bw|ru8u=YKyVSU|7=Rw>3@kiP{Z9{0##cab0>Z;% zP4TPM7=9E_FH7wbb)qYD{HH2n!nDG_ot~+$q@{1G@AK_y@nP_zG2teJ9)>>YGTvKj zmdt<0Sr36B0bz{548T#dB!DSD73DdoV@k5#RSteNTrZu(gCWWA0DL7RFlPiuO9^>= zd+yQDfTob*c>%nH6M}!hBd@p>7m5|&LJaR-TMPZAO^x__2(g*O$!F8M1SqxQW8+o* zj1Xv3#_yd3oJmQN>C4;iT zQ89f90%G=YhQBPF)`dOck&3h~U;9!-pC33S0-U!IJ3TSF=2Fy#HrOp{$&`KK{nSw?GY8x1L zhJ~fT0O~MFiyejRmoPptC-o2iQ(HOa@6+6-ApCffIH-43<3bywtG;`!ds?TcEn#}G z0HtpP%%cwhbCqSK2IWJJoO)+}FFzi&oBM#1UeRfSzA`Y@W^U`tAZ0ProI#ar%h zux-8hFuE3(h*k8VOHdT91;)Tx;dg{TE$}f)>J#7=A!Qh!$Iyf}2%6(~CymV0 zu~Ms;i;&q-G-^~!!27Y#Iz3a`x8p0fiJ&Vvket~P5F(V$wTAO)5PW&uHxYGS%EPXD zI93|sp?W2jgt}ayQJ>Iz$DSS9!^2nBCaziGFwYa@{0!U8EG1kimJDU^uD@KBe#kiu4^#Dw}=VM*m7=KzAHrmf0?(Zc6M zkaApBBUbp3t?>MK6+bxy+LZD8+t6QSnAA-E_G!&A@B8%~NLbJqLPMa0Pr@q|b$UtB zQE`T=j;Nw*=_tgvw9W-R{inm~dq*C5gd=TMlLQ{?;IaCb!P#YYR6}zqBOy>!h7TX^ zX?a^}{?WPkdhLfmgp)1bRD?Jb2)Sdo_9VhmVrc8w3f`%NI#{wBZEL-v02!{=Z4w}) zefsq*ql3t%}2H7osZRGiWJh0fwR9ELFb zxqgwJ6Jz>3z@Hj>klz+n6k9KQOwVp^XE33+Wc=YKK^8Mdc!~=N&GcZo>c8i1_-4!8 zBnBTl5G;gZx;#F-n4gKgrH=YJKj`OUck=%Jg}71I%0wR(G3t6n({n)J6sqm$ydMh! zMFd~nFYR9?L^%xa9tlS+S`a3xYs%Z*PM} zp;W+#m5+?aU?%KFNYbd36ZNYa#7c8yLSJ-xJ@K{h{*-Dpq1epuPzRO zbr=FsZ)+0}NMdw`>9r@@kXUvzvHuB^Cz~Q&${-d=vXZ95Px_L6fe5g%(xSTci=6Sq znDvVUg6O7*p-20xTB|OVXdP`w&x!U|w?17TFL4OOA+T}?sJDsbtCt7OR{NoNI%3Yq zvt0Ioc!@(G4uRH#K$|jt{D>HW$+k}r_B%bvGm-8`xE%!!I zM;A%ZZ`CDY{64OV)*mtLXe}lkZLhkPWzR%{gpMkJkAw*o)f3nH9Td%rn6T7*Aifoc z!0JIDn37}t8z)JX%yfCcGXx<|axMtt*%3YPQ}IO{0zW+jzMBpG^zHePPi#}hFQzRg zy#zY79Gxw@mz{RTC2SCxit@sfk61B0wT>uc3^8#v^1sF(HP2WFR04JQAM8I|8ZBEg z`j*?#`Tw=n{Qam595@h%7qpHT^KU5>MAxj%>y=qITJrh;m$j@{tj!q?(PuW-5ml13qgXrJHfqa zLV~-y1h?Ss!GaUq-QBf;uQTsFGqb*R{=r&(?>!~Es!p9e1;i47nX2@n6iYm0=pHo@(sx~-mE-U8}@57s}_d`I|#b4yr z0sKo|X1-Ui8K*-bXN>`C?io?$+SP%mU$N^~?Oykt9NXcJv0+U~*A9(Pp5~M=HXT@z zHi_Xg_}tGDoGL^vaQbHrp6MUon-*m~wuvvaZ}~irjNLCt3R76-r~oJU4sujtvAo&2 zEvt;a1iF7YI!jhdp#X>Bvo^ft9K?0~9(*sQLUSqmoL+R(G0NHXyD5VIXdeU;K1K3B z*np|7KqGJElme))rkZ?G6gc-WowEzqPld%aS)2GX^}s#<8S%Xvo6noIkw6=rnR$jS zm}|AI-_X(}{$d7qtcm1j$H1D;n>xvRx%2nLpXwMr?hfe}M`iG2uSBWGn(TeQjnH}p zkG_Is0I!0u6$jFC6hH$#IcbtOy!TS6u*UubcoSxr{*>a_Mi8C$!9*0mjV%s7j6hB@ z(38=Sv;@=4Fj;V1A`?;1kX}QSd!zVUH<#NwkxF+@(e??S!&@SnU7=M&=>WwNV$dfZ zY(SEdT0-5#G+_V(B$(yz-w4?5FFu)yV6RpGDB3S%-ca|kWidIWm*x$h_UH)<277WD z`NFOisTl6Ju7&yq{iBxrnggP%8jRaNEonL!%WjR$z;!i^m zYJk;DN(pQSrF?L>%4!@P#)2EQUTt&edr_Hho2+y#@|~*z&uFjHBxK&B&W)GBB)Y;} zr%~Dg1!$xnqHV8$Rz(T!Dn+~6sxByS03=C?YE%a3R55HQuCKY>gYorrTRm9*LS#j%S+{CN(4{6$xBm+84Qabz@t* zU#DCtZbt~fYJLM4u*nC8BhKZ&7QZs{QT%|l^~)g>L}s1W(yCzi>*(Ilpkh7@Szzft zl5qy-CN;ljRTvu;q+(Z6jVOGi>!ZcGNlLsx%8>Q8X3EZ7rK3Z+$a-lbN^f*xXx=CBLE63+O|fva-~zS`)2g zo`ddhMyHS@A*M(CCOs&%penvYA!J|aL&RzHQak()7eEVyL6P48H?1`Ty~2zHOGyo@ zT?2Q8A?#dRIb1D?eK4l(YS{L8@+X;;Bwn{1;Wjr-yj3=oUKd?wsI+ksW^PVvA)@1X zp53ha=Ihoily5h+`zb`>a+4@A^VHvG^-U*He$-TI;<(IYoVI+JE~~Nh;ZtZ5O0zJx zd~&Ac#V*!Sf0WMPdIcEEUz$RrB_}K|c7it?NS=qR_&g=^{Dp05MZr~a_K%sqE#3uC zr;7=&`0|bQ1$~wi={7l0{(}MN?)Vu_Exe_EAv%V|(R{Wu_&Y)rj)yia$Q^RAN4=~hR~*AZieZjRaN6LmpkOMwc7K|q%O z@-wE44py1-g{J9-Pglpg#h>F&IzX1TcbfPtGB~x(?Qm;Qiw}HAKNl;r%muWkSANl{ z>7K@R=-bPGaM<3^(N!}~G4N>loXAd_Izi+6`klajJhSDlN7!kJz=*{LI*$dqqYYQv z$I@4E;=^@?s@eLR__WGydJ*#&Tl+y;$Op`V3!EKs&@fjPO5v+>jGYmjgnKA|1gy^``K%1bv~~>^A#9Tm?ptk(~@oa zpf&zBC;1m@Vup0H!$F(E)zaAzPO*Z45{p;~8_ZEO-xQlJbAUGIQ7^xdTX*y}f5?1< zVP;B=lO+;#DX3})^^%qP*awDuvWM3$wcN-w3`q>^7diQb|*UhbAX zG*=Ht(Gks_#&0Y}{vhlH9a}%Bm+Q0G=oAJhalU!k1cM3;3f0zZ%PTW4iwn5cVr-4b zk5T#8eKJl3eF#P-#(HXd7rI7GC2yU6(m>!@f)v^r5j=`%t$+z9jyRK3Scfq85M#3d zDOMWYXohRen6-?KjWzuE@lN=xd7A~p&Q0U1%5b;{jnJE-VH4*zLi5*%dfC7<{dHxq zM7QRe>O?C&TITgm*~h*4;y27Uq%Tnovydy*^v|hR^)}NSJ?(Z8kBB&(X{IV-FE@~h zH~ljd+4EN#C)*zYp$?5A<)_IMoLt8&0b{M5T_bF50*K~!M%BiUuAnK%d{E16y0PkF zBQMBp(Yp^y;M7r2O*bi$=2r($TXZeiKSG-TKr#Ab;kne(OcgX^y|){)&apUuS@-qm zb-35bF%-3b8eJSu!R=Fy41nDFtc#39<$oTTId?m?@(P*rGAH2%Is^F2yz~qZLv<_F zpJ20n>IGSga`qwwKIzaG73do7LyNC8SUD}*E~|ESxDBbbooq2z{wRYr3S>Xs)iRVO z@t?J|EBjy*HVqf>)NNRgb-gm9{yBa8(eT<-`89RS=})U(oP4=nlcQsj_f{Bvg)6X= zRi}R8FQw;W7ANI!0?dLHBEo<$*9Zi86bP7x*o+y>{*EFl_!&vJOPdm}| zyufpH1Dw4ZCw?D_=acd(B6_W?2)_C2hagW6`V{n2Cmkzk452xw+Nw9MXPqLXaIDBR!Q4>|GhTmV6Ri`!dR;=o-g ziZoKvqz3F)30HHgrl)EQK zpN^kG9vMr}>RPm1j%vgZfBCAKp%Ls5Z(w}ki?t$xsYrS@0?pq^@Ykxu(O}>E8n0}j zND|5SwXd&18qIC!vw+E>&sTPHmATz}tlEejiuLHIBoX6bkLM7^ZRUDaHwR@_2m7NC zIbn0t8N)Ep$63w_s6RwjM>DG*a&x~RTz2{^ zqC`K}K7P2?oR+zaItNMg-O!#g#ZKs0Wm`0@`7zgVGO^GzFDs-jY1My|P!hPu!%JGw zY&&z{f4Rh7cFI|;R0LY>y^Od>Ls-4x%c$R*+C6q#hyh1%E>8tuw=hGcH{_%xc$Ct8 zelQZi-h_5-)vEcP7HXYj)~VD9(ps)g-t034qXh0@qE)!=kN-Sf_rG5)vhgEtujpR@ z7GS;{47{M?K~7~n*UMjrl zeo2wN!@TXZG2`}pokBdqW%M;{{KgfrB{MnM(MsPfY1Qvu)Z<9LtAUk&J+mFP<-MVj zS#z$aBu8p|DdCBqv42J%UEr6NkU`IF$QMKOk^&j(AR-ZU*C#gENLSOc5bw6JzI{pT z1+4CSu^DnY+q8DQgd*g5wZIj2ksmyuOCn%ht(^H#Y2dl)PFHOCr-PwL@jXg_bsZ4t zzJYOj9z}tKAV|5JXFaso;Ldw!$I zhb{k!X_`P(-Inrqfqid>I|9miX2pibWXik@VY=D4m{zuhQTw1MyY8~g9>%VT0xjiZ zbDA#~(-;C7wh}FJ^?~A9x7s)`DLI+zv^IW24A1M?vms1vs=k`}8RG=1#yY4BG_7pFZoOiBkNUI#o z{OFHx5Q`oLibk4nD81gfc-T$B%KTM5B!4-sboeP9Xn(A8OJhS= zcQyo&G&uvl8n{kF?o8zTeET?AXtlf4gK*~57QGhboP5R+`4Kk$&_eK(XMCbkdHOVN z%9zI8kCaS<&?pPt658RDKy{>akU*5tj@5R=ITDonH|l7CU&6hcAry{_i;Hvlau^w+ zzKwGFnY^B2E%;olDD)EFpUK3X)89Jx&9+hosWAV3StW9y;VLr6GCyWs)#%>VOo+!} z1tot|(8OX)>g#7MgWY;Xy4QxDyeeAE<;2V0JF=3WX=dpv3T1Ua7S&HSU>cj=fX$OW zp64GL*GMtyNK(IS^zrh}@=+i}$b3gR4fH*~GnzSo!4R84M!w?Y#CWH1-k3Z==%Q=+ zW0$Lt1#ReKzuZ&*0Ul5H*zXbN$7Ua*z-xp`TB4Dg!kizE?k5X>SOJPi%`+rXo#7lQ zwEzu`Ic;z6q=2Q0yHl1RNNNC$8?GZpZWuB?Y>)GAyJsopkP)%7c2@?>^w48c5(mqZYF|eJ$ zHOgeQJU0I}l=kWqH%d;C3WZDrGPHKTV721Osy3o^%Z4Z@G{Tpy2Bcddb5x+RCERm*uJiV6Y$r<=>@ zBF2DS0VvIcmE!i=z7QmuNN-}JH@2(x(12WuXVIPXdNojz%Am47_U6+@zI1&f}Hw%oWx7R7-Y|6bm+NZ z^f+|K(48<8bpWbnu}bDsG;Ts#{6>xRJcNn1V5$jzuy)=p+1<_5O;kBY+=KSJb-+5}d-pU6oi+-@m2S|au9yc*VY1XY-$NV?C13kug z=NBMD3ay*ge9B+0bKdm#poq(cDA5~*;xZk6rG&D!h#PKUKCmHVBzcBlHq>%7a z3H6DGS{FVZYmJK7<4k8ZM-P4+$7ZMSStEOB*-R}B7L_Bz;-f-@t^LBJq;jUg$$Rmp zhe1c{A=53`d|KRqhejb89`quw*W;?LP6O{YiTj_ecY^Xmqh~bz4xexDE6M zO!^!@-=gv2-L+j0F8un{uc@YQ>C|!C*l~5Fh#9!ZcUE2nPOoQ2q{(Lpz-^+kR(F4j zqeQgkUWy@$t^(4)_JNNdqIH|-8G?Z?Pq$6^F@gi*WZc1*uT1a2pY>Sj-U?C&s+=(4 zgVG@rC(W5V&rdmb9y*&Uubz@-n`+I0Qm)DZXqpTokT6S>RH(vU2~a^P0VT zuk~4OWP3?7j6rY2b#wHHJ*CRd{Txg+p+74+LSY09a9^77i^+W_A}=rRGGgZW_3C}; zz~=-@LL0{}xtnXJIluEG5gX;UB4wfg`*#$s_ur#uIf7$NoQCU%8sFlImnMNavTXcZ zhbGDU#Qm-W(@w@-xmZ<-IwwE6)`TF2L!$Z_z{#1(98Bpzob3x$K_=;wdAL&}awAGcbrYE?Gb`RUzs zdpN0ir5-QhMFMVm-UP?85IaE*$t3KhB6oHm{g zv1>5+N+mR&HPKmS&FHqRCIkkPfk3(!J-;s zXmtNPvs5nawv)H!h6;4!7*w)ec9hc~RdqUMEL|?P9`lLqg0c=hVndg`-(?3SW)0L} z!a^2n46oLl9}nZNy1fLuUCoSeKF1G%2lT&U@wBZjx?j=zKE6$?xdR4N^-n!zR+%)L zXqIbfJg3o$h?GJAc`1`p^x{;wFi79KkvbQ4r4bx=ybH1B}ukm+ID!KQVNV>bw@hLn#5e?tV)iVVFBfNT7b=LF+< z?%1Qc5g;FE2nR&}{n`5CWN;{(zeFkch<-hV_jr!dGJQ0aPxR@8BR-cgjTk`->Oj&R zWf;}X0rccGdwJm1zi=ibZ0l~kZh+kC_xG3o_3J`#K4cGQP^0acun{{IuH%Ch?arQW zSF$>t=%2{CUr(8)_D^FI)(Jb!s9JeqweC19Zu_cQLb4f#k~SGO{=KRl)N-`~jZm~W z1E7tGm(~bJF$lS2*MQr=!hw2>SE_*55%sCiazqcAuZyrpm(;BL=aCFCT_U4V0~_MR zwo@BYt8=x`j(tvyhvWZwGiEp+qKBHFBsez?SZ9Hx@y4$CJWkfJ{^%n5Kqc+HM`gWQxHA+&xzmp$NLf6~a z7MSmf*NYZw;|*-(p#HrA9fF`ckil4l=bPA~WW~>Fg@rzOs3QmLgE=0hELG@3saW|O zi7H9cooL)`6d!T47n5jE?_iplb+@P6B|O>IBaFw42#a*u`_A#`lBAo09y%RBPl7Fg;p469A$Fpl_(a`EkJM`(o4Jei=Zo7v3yL*j;djJpzsOSpF4a|e zJW<+%n9a%GBIQzNYvuzG*cnMGrs2>sKRau--$Cd7x@dJ%T{>{eZRq0ax+4+UeZS<& zJ~TcaH#+*tmCK-l?Y7`V2&DjbP!}@JTjDN&<1`W>zWnH0Rg&{ONb(GjZ2~z*BZU@c zw*B+j2h6w!1{?Hvp|8NuaVk0t$h6S)bsxPZnTpb%$r#mHXOU^#g=Mx4BG)S@0$T35 zdO+Y$TwGk!i5;5K%F5&zaUY{?Bk#lS+O6&J2D>YD(Mc;`?HQ{GY1>Y?G{X@rT?vel zJ^{KDOY~OvoW~~Y+<9~$vqu;PvtSqW_|7+#4|233#>)Sw;VQ%ss@b2uk>3Z|quh^cD6XV*q8vP#p@#9tm zyZPp??`k?aj1$UF8Kaqk!pY-4Ou@;LL>yX{Dtc^-IxWUrsKc(A6RnX_hHf)X-0m)o z;l4e_!aBOTx?d(_`Fq4-l;`EK*naoSUOb33%gLtTqYK`1vytLmd1`?AfnO zqtcMksq30*ie=J`w}F=|=bH#%n3i4QV85^xoq+B)w2!1@;MweGD})tyOZHJUz9r!w zYg{jusDc(Bl8)7a3=4Y!0)CpZ!2!~{`?$x!R4Wy;>U5w|FJ(HaF0b4jYL&-OtlIMj zfA_47C&>6R#j~=qxuM#^a*H!?VJh+lUuPoSWL#1;7sz^*&NQK`va)j6m_t%lR@VM3 zXI`^qf(r@Nb^JdYkX!XBcRrA!q_jRXJ5pSM40)1h9M|JagxCW9P`jx(==K0VGnE#k z0FV%w{3a55O{*Yl`SV1KZ{9NDDVPq41|IJY>81Y3>P&JmQd_99{zbuGGchMUor$_z ze>9y}@#y&2?0&$je3K$&)<6hZ;NR;833y{V<9r*M+2N-G0A7_Ay*WyBW8YorBtadYZ*q3N6KD0*| zVt8alXXhFX4Grr)@Z+^v_j>l!<3Hw43im_@=M&JkPGkUTw}b~sVB~%l=R2f|#TkdG zpPcUDOf>ZX`622(lw6UeL4V~PmU7|3<~R}ap4S_|1HPvMx{VQ1mIT(kx3@p<91OzFgDGK8vE>QlA3`!Go6d^?%?<%-tTBh`lU1RDRBTJla*hH5>VcVEP6-diW-Op@zBsf?)p)<^ zJLqF2@aJ+die!O=EC@8wTe*Fn>Bu(oZxQA?efkS}`xo*EAZJ3?Yb=m0Nbl@BryRwH zUl;xzeor|Q7FQ(azge4BYjncVm`LC%Un49QF0i;f&XMAC5@;3%5n?IeNkCjRFu7br z+MN?RQ2pNU7ooM#_|K=_>Uh}T4p~2ZR$UrzZ*L*IjqU(-4cE{+Vlfrh8nc_HG;Q(w(SyhJtj<*mNb9B#>VE9%L#V_maXihpQ$TZs%H%pcA04k4Ri|pUd9U@(9I3K)+JfC8~sAN-4kc$ywcN%5VwbhUf{mHv?f7^=&|I6iyCo%_NP zmmKta)}?&ZwGdNrQMW7(=rZll5}FfK>s4ZH+=99-Ab*z=W5*hV_0IDw&Jpyg#45*i<(4Kw?~aPe;DR>zoNL)4RhD)=ts0LRHZZbvot`8*$ z7wp8bRHuniL}IAQwkh$0MW<2iqu?1~%iUebZfp5?!byE@y(yC!KtZFUI82U6I&0Q`ujZKfanb7qD}oZ49eeD=tQL@n$6L|^PC z*(Aw48yfRbM*dmy#bo*Jpjo`~QU-Q}Fq^q1aKNbqV8;X{56%59)5kaQ)YU#LR=)( z!Kq5)XQahJsJOM*QFy$_+S!>_Nr;#bR(ZWD7XN)t@mPKn9LhCOyCc!B_{6me(0%L%Y9`n}8uh!I0_oQxIcxqn*WPxelE-d_CmxPDh$DVrr z=Q(<_GW@rmMgNxf)x4I~(8XqcA^Nb_gvm`8fkY1TUf#!$AWQFRb95p!!1mp5r1vrS zHh)^~4F*r?Jy&1oy-%HD6b;6R)4HrXanXM#Dt;7JvSbT3gsR&dOh>d0VE8JI-ut;8 zLCzo6t9-@^?SW%B`hgY3c!C6P1Ny%XtI_#y#L~Nw9bGq@iVpH$`&y_Fy;xT~k~f%5 zzkR<7lK?1-tQz(riRx#SSmrK5UlMSig#Y7f+e!*tI=uQb5~JYiA9SP0K3QuN@S;3R zh#2I}VdfY88WkFlf38Y<3K-sbjSc*glxymD0wAH6`U zn#@*JB)3}9aTf=~+(ZC0YJW$N1MyK-rrZsF6&puNBz=gdg*6k)<49iq?pMpt^K||^ z+rZ^CPC9nE6cLcLM8``8QVpn&eUR*Aou0!I#kn~RT_>5HnZZSAiVWmmd;A|;M*eHd z{UIGE{v0(yV{W0ZtZ1ip+v?oY8U-pG`)cA!eN8I8gC%m~fy05LAvS=5z+GU6Et09C z5jdPe@QLYH|$W9*sDl-xm8s~+}`U?`+(k!bL-i$y305-=><MaJzqMBEaVC*>NSP}U#@r7tGcL1>;N-;jxrYP zg@hn!LFiw6?_WM0UH9y<_rD~P$L3jIQ`MOUM9|PT5zNf#TOkJ>tR;`>AAPbPb?Ozd z65dkz_)+s2oXv2n)-q8P7UMeq4^%*MNJy54^-u#83txXNIRjp9sFea_;6F#kr>Pqz zd9FV-edTbwiJgl)U$%Hfq>-vabvK9o4WQJs&AW?f zL=40{Iq0G#sQOnC!N)%JBq8Mm)fmS&1BKlNPjSs|3@%oE=7Q(SR9!79@!Ic#`7w+z z_FZPf)RtyvtL!T?#qx%kV7Bph4WCCWHh#mHe^L#E^5H`JU=eO%^W~2a>Qe^gd6nn; zoK$NwH2M;Wl%Y!6lQM#*(- zEvoRi_#?psmV#R)X66?F`)QQSdlNhmh8lFg&k{y3^@Op0+eGli3EW+Kd%N~bJgwr) zc@lKryR3hppj$OJ=3n89%nc=M|9v<(ykW>?{4r$+Z$C5q)ZJ?6AgM_nO`6gS&ec%?7$4UN`sPIh45U=BB2O$h}?5+kHSROpJi^(_;b+ zHMhE*;N#6SZPH;02B5eO>PUK*v~t=>01EB)bs?@D?!G4oVem&6_)4)x?{4WqyWsjC z(Fq{mU?JZe(TPG|ijWl_e78G;YOjogmH$A((DicaMAHz^3I^X}$iGGMEE2_ohRbHJ z`tmUjd;I8g{K_!>a420SCPq@`K1Ea*7|^;m@Lmmyzof|*loUwRav@s`M6z3Kk$H|R zh$1jGBYfgbLdh$)5=mdGVBa|1(ByN0bwfFeym6e7jsRw^d(!#xzso@RPXaCdFW*ds z*a^^a96RpC77?#_P}QgS&6X$(d_`HbTM-c4jEpGsS>hQQK4O<<95T}_YuhzfS*pDeV*oP z%WR{9S1(6=cjC_t-lY*Cl|U)M6~nlhf^wbC6L8=-hRVA}W!DGBp+(IEhMTtD;85^O zZpFr*g;n1M++3;t@i_iLkXL(UDDusYr!u^Vq54_0`9p~NK$e(ki7Fk&HGDflF=QAn z#ej}J%csnb)1g%<8_aHhP^bO+bhB>JfAu+AXsH8%!lxo~+1E+%atFu6#Od2Ffk2^+ zL1gHOu?F8L_Se>qK93x@=Yj$nLC6qBr#zlW27KN$S*{f)uYwrZzashPe7x{{tEni-xqd)GLW)mFz$wp~ zchh`_8S#buzmF1=g39;k!sBm~pDY$2EjioP7puR7%lEYa2z15t>QlkydllVmXU2B7 zcz@gc32OT^igGC8OPK%2AtAPcyqAlXah5B>grepuj+y&|EL_gV>~mE{um2qBXMnk2 zCSNWyJNrC$faOGXrS8jIstU`me|i1JaA?h?0q!)%^A>K|44Z@Y$BKY*$36=W+yy9x z$+5he@4mvx{E1t}hP)5G-`w@TV2L&yyYU%8k`nnjjmr3?qS!=@L`6-2f^RUHgcvj{ z>e>6A`5BO^NG)VOSr{x8jj>trPl+F@INb+pgMMO4L=vOySmeJaFB?b4+%MdtsNK+bZnqn^x(%zf9^X7|3y59HSyM`yFC*mQV zhV=N{W*%ech7R$Fv~~MErIz?ls54N&9WnzMqpAf7H&x{42b1La)ZMT(#X&vOUzIK` zEiEe_Od7ShxQ+mU&1^^9+E6ovr_?RR^^ zWVUP6)CtoM12UI}fnoz62n8iz!=QVrS#){G6o6Qa6rB+?#ij~?hjr*hF?Ht%qvcNj zR^J%b>g_D|=0G-m?N8_HU0sRE$y?<3LnG{672P@6*=ygi^|2vp7C5Hb4c(yJYrx6g zej)YvupLvFn=65M1?kPQdd?3xTWLuLueeSm&CF<6pb};$C(}V_3}^X#ZvMy({&HA6 zJAt6%&MC+;1ed3z*_pWzEc7XJb?t|}{YfrqXN zr+24NCX%y)lZGbFn-!B;sy@SYVah(W$6zYK$_Y$RB#`zcwZNq`=5K}u{Enrn$%Kq_ zkAD0Ye&<>-Nc<65r3xW^hd}O?`&CTt#k~Fb_P7|e$sxfUTTM-kCv%Eln9Q>Wj-Vvx zlCM(qe$=2g^;UR%JBMX>NsngH&+T zk^zLJPv`Ms`tWKgs4Gt7q@efG4|d~Y$zD{6V8GZAGpO^dI_NS$qqmc?Ng^1X0;=t$ z-{l%jOWtP4@#;v<=mPdO*Quz zmy+YU`ub5vN5|!HH^mML_Wt&G{unGKcOg>o4dnWKdwc8C;rHlbfsZb9KshFt1t3Si zX?rEtA0~@{UWEmMxB}DYh2Al%3a`+pyxt0;F;VEV1n$`Dbhwm)Ve_v80v_W0?kjLC za%%D|PR4s_h!nS|0UqMAheEtnEhj4V#%_o^bl^ams8n9xz|kyG#VIuyHLem;6O>)O z=QE}R!{<-mVHP4&%3;i+TedN9Y^2TP`Hwf@rRM%vcDeqAJ5d!Vlo!L7!P-KI^D&dqVE&Pwi|Lv zhV)JmFmnAh@l>6j0?nXq%U(jaZX035s~hTzRG<%_C-GJLUR-Lt`2gDc{8VMPbA63Q zF?U71q4gtiI^xZ{kbwPXE|U3I%1A)zsv-LUq`&yHNB0BYn_bVUU0;ZT%na9E_#3}b zc!WE|bssRnvHW(O={oO(5PW|}snN5#l3iSU6}VUN9HLDr`L4{rv>vL&27v-~WH*!t z!FT&u-z~{X6`NIyR0c~(*aYR{4943YeB;Mj{FL?(0#7Z`bHaUAJ|Ng|bg3AKu#Iwe zJ53D4VkAh=ff%7cf?BBGbPJ6n!~D+6>~>DI$b;65qBJT?do(+HsczqzMDAD2ZFUSe zsmH@FhIKZk^e5~3$L(m$i5oEtn;f%}d;Wy|pW?_t2_H?2jwVvPUWDjg02mObb&5b% zOfQxCEo!`v7y=uBsHVtP!)A)Vd#|jPtJi(b%KD?x#eWLE+=IIloFY5+;**^iEsQjB zK2Y*KcL$KXK`Gm`M;j_N^c+4M1< zpZ*2?Yp|@1txY1&&g-SS=Wqqn9856u6U=a~wNJ7lfl(*Y>P*$C^}Ky7uMGpa-+>G(E7^(<~R;NvN9{MJ>`etf-7)TLmeS9e0>y!~1uVO{rV`DoYx zDjHfC0M`l!4xgn&hni~@Iw`teM5&{V)nigdBo+3+7&@Hb86&KN+3HADJZU5{!zyAK z#$F=Rfe}FCngU@Kz>Nw0i1|$=m&4T6{3N9l?%gR#Ef+T9$71v`Ee}-&aTJ;W`Kg&7 zv@h1s&i1Okv47B8zA?BfQ<*UI=;FWn-n|j}HWY(P<-3q6w7q_!%cB-Anb8!@#(Yy7T9m_0d5zGESB< zxNfE&vkN-{B<#F!xY=gA-T4+7Ue<@)5(bHS5&%UMRhL*b2CE#CWb{T8V$j)3fEa=U z%ms20=-{gJPA!#=SmvBx=&P{syL5yG$MHPKs=*mq2sA1BJt*P>v+_X?EgfO(#HdmX z`ix*sOen*h%Y`Ew(u0SalOJ6enwgDdqLWK35(gcm4{lKK8Z z6Gj$%K6KQeH3G`SC3|#jT%RjNYP=Zqpd-E2<2QpZKvoR~nVdx03uO#sEi)RY<=XLN~HvjV=x_Y;~PI9lMo9xxQpt=Ch*xT%>@^(hHGV@tg}JJu~ITUHJ2SdX=={4@0wM6+KDVd4^gY8ONtBHonKUJHEYJWg%ES8U*Y+ zzUu_|l{-}U7%of`{R>?PVtoivfBcFPb_zz+~Is=VeHeKkzm2V_m*93BrdzkyvhYe?c z5X3#VVO+5!48xymxQrL7d(v63en_+?@mivbVGeXWM8UC_D}A`@VYZn>HF#iTW&KRD znd!T8*RHIr+{zh>$L*09dMY#|>(=dit4@UD*!!frpyu@JMhOv~+{7R4i2!(HTw)22=@TdX#5v=eP;hiUDTRUV|C5&s1dd5NikHuHVqMEbnm(%tSl)5g6SpC~4$OhEzd z@PdE%&Io}P(|7!F{oHLI#FNC0vd)-s(UabG0rYpBF82lg1Yx7*CfvO-EL4fHxjNUb zc{0CEFH8@cO%TYYv5o85T=6Q)BJkc7lPP~;y^N)jcJt(LoESlS54mIcb7DjG`NXYf zYBP5YW)+^n7Z_F;!)*m*t?IaI1s@09oAlSHZLh@SOL#KG#>W?(j#D5Gllz{`Wxdmf zWJH2D)xEpFsbdneJeG7kRxJpICMG^>Y-WGaM{6u>o*|HL{oFml*$ib?u{ucl@EZ#u zy!}Cov0K9LsMtvHmN@QY1<-uQhl&4LrWlR^P;Kf-e8c&B9gl*gVz@qh?c4&f7xlYyd}1=tr!eN zVM4^Dgwo}Zi59LaEM|B0|29Ai-Gh=Ol{H?~ijoq`k{5aW?T!Gbp#%_Ed7FW(`x$ z7SW;Bs?;sC3V-LhgC@zc-K(sQ_`tvJS2vm?Ze*jb{fVK}rc_N`ojNKCO-)Y^W|t)& z<_8Pm8VT$h3C=R4YUjmhOu{Ob&UY&JPOW7?KBk?<<<%({&_#8_ABGebAD2e;Qddn3 z3-hZ`F?sm3X}xGL%X5v^D$~0N7k}qSf`;klE%@uRrQ3ZV#fv-aw;Pkb@iUTyv)bu1 zUnabxd`mYYFOPUq4wDLV6Fr0IVSk|86TmL|b#c|sH`=>ZLV~QtHJajM`3`pHdmSx< z@-)`&;zG$93@Z42`u!4JSneMT~@goSkLh4!APR~+l#?7yt9TQf{2xIQT&tB zVR%PV3KQGO`D*B8 z!9Oe8?itW~!jc|-3QX59(&w^IkW(P^1l2AQF;SbU9tGLXorA~JEYFH=ouorht6E3rt8Ad9+ zC8zi|lpg_gqiJ1S>Sz%QFm7u>z z)Aqi&ANn@THe!))R*wrUmGR&U9BN(q73BLi>fL^0>ePS9BNYG!r199qXZl?Y4Rb66 zj^Dw8#Y7TK5Ai$R!Px`KhwYFaKNXpAcE0z1>qpoHZL`Ix_70oRWx|2hSRVbx?H9eR zJL`=Z4dbI90CfaAyNeHk&UGVht$x~>~&SsCGeLDK?lS%a5kmOIDI$3^%8p>uFo|E3N z$CHA=L5ctt-}8%$ImjQq-V(=Lc{h9^0iB#r4MMR4o4#wSO-L%h zTbs|#Q{x5+s{S@rRX5;P!|>T&yCN|Z#?}@88O?M~I|#wX`C9AfUU+wTFh|oNUIfKe zYmVdr@&{iI?pD-|PX&^>+N(^ho<%*lJ7yJdnCgz-K6C7UtT|n)uJY%SeVAy!Dn%t1 z(8gysVA;kHeE)iTXIE<{6Bq9OQUD>tTyP!zIC>6!215xQth~X+VTt<9eD8)G_l9|R z*qj{nnA&ldC@CY)5JOD-fj}Ag1&D{cfxn5lP)HmX{^Ov8wgwFL<0Zb@Nl>Z9BwO9v z*l$%FXrHuVqtEfz@TG)JCh%XfS4*OL%{_lg}k z;2)-?rH0Iv+CRo@K%ZPQ5}{Pa#h&eXN&H7S2d%sA);k{8Dl2`vRW@c?UMRmG0$eL% zXfVxQX{hRyfIiz%fY)tIg7{xh{k67-Zi{9su->j$5nLu!SVH^E`oc&uS$ypz$Xnj_ z<5*=#b67veFsR+7;=d6S40YUt|Cp<1fZySui|am>X6Bd%u-r!|&L zVomY~KG+I@$j^(>%$jO#gp8!9J)m%(?V7p(4U z?S@@Vnabanc`h&88^^_BI_|O^gq(;;ET$Ia? zXLop4h`*jFt8_)hV61D9PEUL1lyi5<&tCF(vgsu&Ta0YF`{|MlTo+9(idh!iXx_gW^JW7huAl6;;C)89h#35q={TXVbBZUe|*!$BmAx zJB@q-Mqhem6{YhvewKZ!43!eRTQJ)#uj*8LVG};6xxL@5``*v$2OmqIdH=zjWsaL2 zRhx8^*zxyUNX)WP_Txxat=l2jVNN!KptcY7@!oLWw(M}0=Zg-0)e7ijW&nd>!>m&> z>8*J>2d}azN0nxcE#6p$91ms_n#KJ(W_QLw1s& zOJ~1|BIxDajpL)d+n7$2zi?L_%ACKhTp^zjoTy96a^7^DV0HvWhr5c%Wr5#C4}zMZ zAwHb{Ct*&Vr~MuI-C?||2=P~)v+f#{&Y*AgN;cL=iUwwNJiJX(KEG^m*e=b}cAy?* zAH|m5ln5kz8Yxa|=td2ln4+Tg7mLC$t_}6Nlt=3BtLnaF3}id__&Q8vAU9pE)yP*r$qeQ@q?wJat$dH`ze` zOk#OGXykjn#FO=Xu~~Vy$@^q6LrhFXYfC%2sY3a2d*rABB3n`4Yp;=p&Gp3by)bcz z4w2h~_kKB~Kh%wx^*W^8+4s6Di&+=GprM8|G=8hcD08w6O5{`>mOZ0OG4L!Gd7aW@ znq37dczXNunLyD-HF)KIQvIrb9tCzdv6VDt zz|%Tu|R5t6~9;vx>_KqqmDYnlz%NVY?ytL92`_*9Yd-u)Li+rys zgdgo?trsFYH$E|+vkQIHIt!gHWv7v=OyM!R>qQrPA3gN(J8v`h#rjKcQEwrUHf0JU z5Abc#m2Ynxy@$B-CdurOmtM%d{mUP4QQyI{DJi;MNOryni@1lw$X#Fmf!X$tT}WkC zH+y^g#Qnzpc;1bVVtb=0pvJ|)Fk?L1p?RNlgoKq@GnhZ-_L@|gppAmWa^$AycEEvY zWv;E%)sKkJK)*TeZi? zkr)DuKLX`#trKwZjkMc+GZp*~_}Z@1jBGYVp~z*gibvntILy~SXh&A9a>EDxTssIh z_m>3XWI78C?6FNZtk_h|BnSAF!zH11CmB$@-~>h%+@+>$8p3{rVxuNA)#RWQ?o{YlyLtD z^w}0e#G`bd83~7r7c-!id{>@-VRL_rk?OyH-6|N5r(cN{+;`f)?ngCOW_7kk$qIVy zAoh6)lgo%|P%E!>NJ+U<(1~wOv8{+9CytHz$nl%7&Wd|UO zG3*Sq4e`v7t{05&aoo^;!XW9hb)4#^ChV-|;(~s)iRB2Y-vI7;t?CpocU!mS<64;t zTP8)54F((02~YCqe_bjq1c-nu6j#-UGE;yt?!eSf+))MD1iX)zP=$_tUjh|&_>fF$ z{)(fre7*Ya`z1YR@WDTv()q05=eDp_jR8U^B@qDTDA&e;ha}fKlZb{cBRa5vYQD-W)RNx5tUy9Xu15@3xT zKx|O5W>ZWE*%XOhg#rf?mr<|i*l6IFw=Ed)ExW?tWyyFaMmh#dn7 z$r=kkK37A&TMX*`NIF|OhhTNzrSyLd*)D0lW9jj7w}uGCDk?DLX*-BB8oiqqPRb5& zr5+(p)M-2#p@^Z-5(zbmKET!|){eQ~rj(9LnJttSH_IEQt*FP#1>`U^jdpl5Ns7zu#* zcyBep;t+3);E3SFl|j?Vq^W$oWqxjMe@V|_P^r+epueg@U`Bd$wi+W~7&W&eBqBaa zYHbF-iI$$EfQqa)pQ+GeOV5784zKO$7m~%=oeul+JTHy<*jGWwT65 z=f`tJSm_SDC=f@(12UO5n>0CO;oO6x=Ak9dp#Mgnagd)rh-EBfFSKC{sY<3J1i8N* zV4p6#mN(h!sua>p8F(TU2m5C1vFx9Y77`PG&E)oS<@=UG}xRnnNkxLsH{?teev)4Y+|Ifj70`6 z(2{u3|Wh_n?l(##nCD=i1$ z?>m^vmy|jv{Da>z9?!W!(j5vTvd|os$tLgq$*^WR?>d z5U7rXX_z&^eSeKD08B1*&!xu#_3r-N2%6Q$NlBO*Tjb0_C`)x&`(>1Z$U09Xl z!;h1X_Sxy)8}a|+aqejm3!-y(`T;y2sjJ;JB8r^}LMRIq0~)Yjm6Kk#7}ZwR&O1It zO9;$Q+o>Gal=3k>O_GQZae6ZOgd3s9uBO`&C4WdU+=(XYK>YL zS)wkXTs>XgJEyX$QNDU?x`)|RtrU6o5@EXBFOz|n`BQn5sjvUZGmh9#Tk1Uk-&eo7 ztzR7PIImaGE0;j*)PFRlD*gVz&wu|;Cpk+E?{-*fvU|`N5-_QxUg_7a8)Zmi$Y#_f zOw*Pn#Wjc(Ecm8K8u4o$H8H+S!#6s80bw&8qXnx5IvJ-BPue04G*?6A8M?i(d9v!y z+iZaGA=O%$spmf+>L%j+L=2_L3=k@EVG=ubA(ArEE-`l zH|iZR46oWS&~aFpSqk!6)|!Pog#IT zV-x32v8q9a5>eqc6D9h#tJr*QlKlrokAHlJ*~dOxb(Zbkz9xe-vavweal1#`CO_b# zjpdn4+_+Ud*bHb1XQtdjul(m0kbOi6W|o#+JoQE{Ig#+N%gdwC0++<&?+58d75S$AjQ`>#`TzGF{!eb~;TD4cKiv~U)5&3_&ZK@F?1MEh zs3!d6o+)Wu{vT_=|E9{Wx=R9~KI}UnkdF1Ir|rDkm@oH7lP$nVsrtF^S=xdBpIz)b z)PIkTP4BVrfqtl}V#?%o`UP&frQ~(Mf?Hm-WdKiT=?6pQg+53dw zw8iqTOxISgXSq3M?JL~M`dP8oO+Q?nR{1+)7IA>5^o`WS1SG!Zs%5y_6OK6uhr0OCT*uaRj`{GRY^NO@#-3=aEB&rk$|F z?DPwzuhfb%Pw)Ch@&?JYmpCri;mS8GRWK(_on6R7dFIB2#)Vbs6FBg7q|LaN7CroQx0qhbpYg zY0H7a_=I=doCGVr#I4So=5{ zbBEG_VK7|yAC};dvMPKlgrE;uXbMCMhDddqld;B#Jf1zJ!fjk%0l`}1+z%)689Ylm zRfbD%mpC23B<~944#`O>gwX=}qp*_O;rDR_;mH0Qre<30ZuSty5%YZoAXI zn2fIHlp^c#vh*k^t=IbZhokh_`D6~U!wChz)eM}cK}LlmJar&kO+KZq?zk*OJAf65 z-K?IXr-NkN7Ro?LOnlXiaYzLKfi38!d1qR>arHaiCziBILycMCz8PTws;n4MPc|8@ z%~17A5yAAo=ps4BhrbUIgfkcQAm~11@mP_4YTiCa9>m{(sP+}wIyu~$xOZeSf}JkM zMk?sOxo~&c^W+oy(q~uyzUD@NSwP; z^XOJR_)M_>_Bwy}zO|xp?>HIZ17K#3VBS0}H)-8j?E2wp1BEeP2jLB?g;9BE_Bxa6 zuS2B#_ityF+(T(5cHR^VV7sxhWN6KT-W3-7Ze1j>+UHRvf(&8(?ZU`tLBdckYqi&K z1p}r`qMdbhD&V^Y2*!Wu zW|TJn7*=>}wlVn&+VU;Ww%ocseSwin(~09FTMrVxti9`?DoO23@7`ZIiKD#4m^dI= zB0RD+Ob-vHc2@S6xr5^@O4TM-DA`X9@609p$}>-kY&xJ~KD4i*}L_+*c7 z4s*8XTHpWskF^-Td?G!Jk=XXDv-(X)|M{zHdDa)0R6=WN;`xv)b4-d@%x_h&{V6Pp zaCkkb7aNI~YCz_|kvRSOnEW@_S!91cZh%K&eqQfIY!d>y4;O=ntA@JbOU@93#K!Sg7+B1+*UN=mX}hraduhvvWBe$uK$%qqa=5(!@fx(Ao& zlRTdu=TqivzHgtGf$)*Xz!E$|Jm{l!bM@w6{2m`@+CGU(}vI&-7wD#JDjOJ+klQ9*G=k?z$k<*}hNn#_Gz zG1w}dNczQvK6bB3j$s&R5xEidB*=!VQyu4z*;l#i$ajk|>CZIqj&Di(w-aS#sNUH| z_Tvegn{VXa_b8xkZtZlmc?5q8u`7{wb!*6f5h;){hy=k!#@SSmPmEeMN%Tg_>G2>> zBSY^80-Y|;QA>?{$5#rGr=)E8u6J);m~n^LG$DD zd{1fMp%#bz`;#@_aRGuFZvlWrq``mg>7%(``~~K5>ve>0_h@rcFrHM1-~AF8@K*18 zB(MdJ-71h|wqgXta8AwRAfngtRz*-!RvEdvmWl_<0Cuk=BGcz76mQA$j?s8o$7VQ6o+(sMohw_vB&?SOX%^(Bth>Sv9M$E$*BY#A{o1XRNSb<}$ z;6SOQVaao$==STaDlx81LP=J$^Cwh5N~OQ6UJNg#6mmM)wLD>ysk{|6cY1nyFc=_+ zCoersZ)HdZHkM`)JN9_ePbdwQlBVIZVIm{|=w{qIH{9twL(3~^G+;WwMnEt0uYY?9v75bL1BFO#)?N|*@~0A*k2`#$!&gvIT{wv$I%1R!{>k*ynP1y! zIw+NA^Gzk1SWT0KnYk6n=E#`Fk|24D!ZD}g2eSel<7Xl!*1);IGuDW44;*+Ie#LHkHcO4I!`nNR#wTqWkJ-etBsl}InP%nK6m&6Jg8bdcV5}S zDrikEBmCUo_PQ%+40ITL2ODyn!TQn&ScWcNTv}Ybx4f@8J5PnQW+e7gW1dD+Q30O} zj_YVo=?epOO*KZe0X{EIZ69mdMb^fN6WJg%{4BCINf^0Q&7Sfp%uqeHOJr+x?0@ID z*ORQF+A>ymuJCd*`e-8NC=Lt~dnZ8&Ot0P*>o@(3`_&#La!w|3|D_0JCr0`>mSw15 z)C2K*F~`B%Sus^@corg@@$q?I}YHDA;EtF~df%TZsGcc5Ctb7;}?4&I~!e;wr z)58<~EDSI}&-G#lP~m@1k!JXlg>@UlXhRn17f^ z9@7#PXrV#4)KM40BlsRC|k z2{yg-$uzqHEo72xP`Z+E5A|fBEPeIUaeN{d3(IykqO89 z2O~N{7iILr?BNg9^5Ej3M24+GE5HRH19cK;k6e8gGuF0%m;&XH1Pc5R_VqpZtDA3ElzqSws;QodIW&}80R5dG&8msEoS zC{InKuZ;~R83~H{`+eqH9CbdX8nbiet5`7RaV>m@m3Gesl}d!@cx8-Dq$QDCuzv); zAFR>>Q~x?Vmm|}SB&Ob4${?h?Z~E;BdwsuieBMme66xzN_VdM+Ly5^5K@OX6X*ZIF zkpB-9^S4a}iJ&uMBFn_RZk&~n&IPbZtM6F-_Aq56OOO4qy5FjaBiDI6v|^1B97n+= zr0%}vrrf!84R^teXW-ZczB|AFF+eW9<^=65>6rjQOb@qR*%MYwVv-!7;tkH+$BmQ4g|;q_!YAXU1K zdH@hJlrg)1(&)6AmsjZuYHen{B?{}H;VIYOm)Fa8=@$#jlSW(P_4v8y-{>yzv$mVGBmuF?tj~p5Hww6& ztj-+#>_a-YNtUy&X)aL~7ss>a3}FDGpeE?^g-~vtPT%mir`J;3-NO>MUHQo*H^BQ( zA7YwEz@2;qO85n&H(kd9w+wRmX_W$EG3?9<*`BBX4Cj=D9xUfjN*0Gmm(PhsQfe}_ zmXUibHjeptgIA-u$g`u@m6g_5YbRQg6Wj&z2E>YFAoR$v2SN$eiwcRuy21I?f4G-r z{~)Rd7r{BI=(Gv7;E1QeeuL{mBJ4YH{VX^P;gVLtxLgcr)~CQ(h{R#ER}N}wTFE%R z16RK71o4CV2w*x;|CN#;bVy5(%&4Ygo7eYEu|bPZ8u2XV3T=u&k=Ot|fKMzKO1!R_ zg>>Nc@sbtvaUFjUCvY+I%W6801j-+FrlS^P&;7W(<$+uO<2(k^ACO$@*8KJoemT0Y zwHYt8(R0J@JBN-_tIu+;P2lo+>CXan8|-|cvn&U)ZpUJZf#U2(nip;@(&Xv(2?j_W zlmuZ|hgRC6rzatY@tKnKt?;v<TNG+dk z`c25~K3!gQVapT%jFOk%A84#iNW{rpTZJw#87DGfzwFhA)&>$%RX^4gXmzMGSx^^;b*n%HGhKHF;KoUbr=n6cg(=TKawz(|gVmC)S05`)p3prXF zap>>O6qw4f*36Xz{}m~**O;b7f8k`fEYJS9>ERWY%l5 z5RpaT=XuBjqH;XP%GuFtyPO9J5qP-dy}JdyPU;0Lb~=g+xW-C@3mU$KV~E#-YwKFj zvxMd+fQ{$Ti-xCf2Q9BFkwncmI}!3l!&?c0SKE2>so{AryJd_n{uIAVw~~n?vNlf* zs&0}t->pUea}e+5b_)dG52UCS{Pbu$?n5dPOowIGW+jA-Rz(Hm;!7b`DG5+m@Z9~9 z_P4G`k6U`9QOu%BH}o#Q*p?pZiGA=19r)`;j{SHe(A$c&Yt+yO8J!??XV7{}>O9Mb zH%2Z^O8YJkXZ%n_Q1GqGA*+~+tl^6n{4~E6$~N^?WBv7OJee{DAhYN5$mjAj;rhoT z-pJG+)I{8vH!QXQlqHfR02A#(o0;}X2)94d;btRN9k>5f=}k2PoZATKA*S-cBcmVH zqzEk!&&??+b~@aW$b#x{DuZ8YjS2C`+#$ch@q95XX3t~K2!FF0(xI<3X$n+!Ra zK96G{Pa2uN1TxdFC{GFP{Y89}RC&n09wlnZg(t_q&S5RC(}Q}|uhzIwfNfksfeUt-OJIpU3Vj6)UR?Rxsd0X#?)E4{x2rLu|kGo?SO z7B4zZYrtGF-ES6w>%RTWNttnOH$;R0T?St1*F6+yaz(a@YZK{wx>sV?&@*;-BLetx zDeq5gn{Pj4D4)DyY*lG4+aF1M;ByfoX^pfhEJ>$kL;fI$Q>b-*OMH8cP3N@o^9!LW znt3T3aVpJ?ehe$BlP&m;XWgq+wXHmhxY<^nXZ~v*2T}o!Kf!2EO2K@0jqRVZvQqjD z1|{c)0vgo)Xn=6)HC!xIZf}`W-A{UOp?Zd}>ttG;6qLh`1%)a^BkFIDAE+J1jVqiL zLu9(Y`re?!i5Xgc+fScnHHbL*j#^^Emc+JEveN-6jpUwDSZHD~*uWuH(I*wtYI*MM zO47ELq40Y+XQ-Z2og%iw&QYlbh-C2R?f!q0_eoWe(gpxNo=NV< zfL=pTF#WD_@1fw6Dw16#UK*gc7!hqeTcO_wq31tIk$l&0b3bp^)IG*R3zbsL;`+--j1{u4zFQG{g_Bpr8{6m_ zI!F87T>Nof_EVNsb?T}z&_WvQ7`z!D1|~Knx!Tk?1T_I{4-#`@oS=IaR|pd!*y!I* z)~S$ab0 zsQ|u%lm0H4SQ$0m?Vo)~!HNxEYqY5x-E-jR+FFNee z-FQr?8mnV(GO87VY1EaR5#X+FPTH|Vznzq7_5%K`6U&ToS}qfa4dnD7W>rib$?s;COEDZV4tft?XllzK@n za58lClDdG;#hvh$mmOcg} zP4|HNe9yJ=E2uiYR#i(BT9UqG7-RprAE;Iq$WNvsP*1A?S&xW9kTYRrQ&+H5vaAF& zv^ic~#4c-@&I>12AQpa-aeB$^jB)F|R(MvIN)NHWU#%b3(7z@s3&d%w?e$7e6D@rpkw&qiL`R1QTQIrrQgEeG zVlPDRzzQ(pG&lN5{>A^(zey;&a{s2k`8c2d@fDXvzwDRD+C8`;+eGw%o11X;-tYpm z7;Mk=r!5Un1^w7NkWb>!(vTrG2=y*j7N_vZA|b4K?a#2*YRF-2l zTS8ihLD!YIwP_Gg5i;+6zxTsrrx|Bl@<_S9sUtxuVD`J+fFZwQpsieb>Xo#1%R!11 z(ATN9r`Nh!(X`Ne@cSe%Xk#mZPK=Cd_Sb;!JyROWEJy%yCi7K-WkB^u-S4&1w&=WP z3y(e)cqpfpeYEjAR^%%lQSZb8#8~Y@fcho^y2MMtWkI1&kwXA?S`WJ?x4)Whd8|CU zJamA%e2P>R24?n)W7O9WsIs7il6HEk)B50FZ5Wk>p5hb3YK4sW+QDEA*$@#O=rehJ zt3HZj+jB^isL~L#`HJp!%mbR^zkEIT9KD_H98=bj48}(K=SxA`!|!&k&tyy*`=c5X z1!g=)*wd=oycFMYQsOeC3o%v%iIcis(z>!oR-9WSLUuFPDDI5@-+x&;b1>&rcc8&y z{7&Jp{~x4l>GyOixWYj9w>Qn_N%b%BMjbljW`~oLTHnid?}z7qRLh$HZf~Le3oj>0 z_N;LdmhXQS?5D;i{r$Y7Ei#C|;vSt;P6ur`clmPJkydp%qbaNm09uPpBHA=uN=FCA z>tomifQ1iKHX@C$qed6$z5R%FPB;(|ldVdYB!H@YzoG_irPqNSYI0fMXYP7+&!c3{C1>O61k!Xzhu5i*1K8}Z4onXUM z0Tj%do_54GY5UC3MwI1xNsd1lTvf!UGu8b@*!DPO+nnU0B#d}#s?H`IP6KEf`bI5- z%kwze!h;tzbu9dD8a^@H*8Hvf>zS}9EDD2KSLQ3&jtOLrbFsW*_+XsxSr^)aKhs~% z$;hDF8xrlM`PbDG1;q}{;8qs|7SHSG5dm<#ztBW=6nIV`9J36bJ*AAM-40ijg#qWD zVz9YF*^IBJC}GEVvC(5D3X^WTDvk5vf6>2Ne}^v}gMcHU!prw7VhX!1NLlb@KAx2Z zCb`w2Nu20nrd2p?+IF) zg>&R1LybFzFchUbbe2S$#GJ2zEhdHz;8u}CSW!A@vDuE_G=kJ*=31ypO&!<#Cj{36 zzp!A+x7XvOmIHTcel~x#IYC5THgAQhRvI$T+eOQ6!z+T7XOrC{BPL=r6U!lNUNJ%i zbMH52k8r!Qp#}t;lt&U631add(QTVl|IWZ~vB`?*_$LS*7#Yupq4B)ToJeHSbd(y< z1?9++Oiqn6a*$kzx~@*seV^zqpHuiN2)(X$O`ErLq%GjQpiVO_wXQt0EgFbz(07_X zZtw^y;HihW4FgLvX~`IA6DLp!)cwi?(*8}EPdcEwfIS(QOfj8wxn^zy&pgVso^tui0TDuv6Q+&rFWSz3e3>3oELi?Lu)4&R(Cn&*I^r$ zd8SbLRzzgL4=)!pB%%h88$Gm>{7H)Et4y;n)1wj^L$Ig7PgJ`VNKNNb4^rK>f|D&l zuzg5Wo$pmXf%M+&0xyBh|Fl3X2y-947Ps>tsa@-josQ$|`N3VAoWJ2$O6>Ahesiw= zp-k9+YU0LZ5UjlwaQE|d40z$AP%>@EaEqrUXFw^1&?-RqXZvTNhY@=Z76~*z!<4F= zZLHwUNa4-GIPgCRQB!-evVbDnB)nw?P)Ik58#lNs6LzNM7d^p}ixO9QA&HZX4Lvtd zc5(I-jHzJaJQUylefsQEqp3CbAO$gh;_@{{93s%M=VFW4*U+r3X2ZMFCIA)(x$dUX ziq0ezAX3jS9rPUwTDxS|KLDK?imVkgfpD+6T9(~&;KF$;-=BO^@qT8^$_}ivI2Bga zR3yH%FWyA|WnE^A${?z&JcxJ{r0tJD3AWLR;hk;e&7uvd#=W{i_9flpjj{@&2`p5F z#NnEo401^hPgR@_rv zia9vjvdQ>A7nEm8$Vu~_TPFT+mcw5ZC6ti2QU=;S&^w#(@^FY*v@-7B;zp~0O{y{w z{aNnOxNpDCB07FRCWLC2d6sp`i#R8^&XD@Zv^E=RYE&mnaDK2IR?NBF97M@?6EPk) zjMpYLG_a3OPA02rV`}O0L$`Fz*I|(koV1?4xK-{4!Cj{O0KHTrljyP-P3KTPg6=C~ z=r2pTKSTBjR}2@fg;!P1NQNmCT+!awRQ>o{COb|_viHHig5{)A#cUdZ%Z|oi&^-8e zIdh9`?88~JVI|HF8t#%XzYjd8swb zXR}pnMjY52G+1Bw5_sJ4IqzRf%VB1z6QSISL`l_sLG;UACP3OarZMxtlAps9^hHLM zt0HjDVt<(Lf+s#dcd?p-$2C?9w#oPsyVef;T2YeQJCVr8Swxi#2YkWf_z?NTvk+;i zVNdt*DeBP#1ZWX5%sC3P2HDgb;_1_eh1h~#tDu-8Ah*k(^V3nxcmQYj&CSl28{MY~ z;dp<}T|#sG=MvSvu<3nxPS+k(2PngdvuPjx!ty?LNIp25k5f?!H_Jl*UA8bZl(l+% zW)1Zywh>sf{vqX2cLInzMED23`af?XV9^X346$u&zJ-N7BN4vv@B4p6A_T*FY>_1T6X|tdEpYI>56cCK4wmgwzkin{fL-peoYbwNH>6Pqc`Kr2Tf?70GU+#Iy8{D| zU18awoY_m_d+o%Pz9K|;F%F<|cD+^Gu7UZhipL?xMz zBBBqZ*Lci_o?u?29qk$JkcfDl+AsA8(A(l8v+GD9u*=!88)QW;)7&pD9y>cYU?0<5 zc__gOWO4nXnKH^$1>hS6vIzGT`6qFdiF9$5iFLUR{G=<=sL-lO9pkDj_7;!z6+Ynb zx}D^)pjX)Ne|W}gfY0{O*3#z`@yOe?H~mpJJ8e+TP+8{-8yy4s+_vER4hFoR)ZKi! z$!%0hB~PH(51d`GNjIRv8{))FpiSiYWe21Anb1!AY=ZRk;4d#HXMVO)f3v7%WHXb) z=Q04%T^uLippGpve+O|_|C3Sb;%kLnFGG$3-^qa4&y~lfj;A5#QN;_|SZfd$crp!_ zOD|0v(BAVEWR;=eliz;CAOm&WxS@VV9QM=hpQ_KDWr{tUT`0Mqm~Nc;5jMmVUNB9G zRi?0C&DUj48GBS@Dhc{IN=}GazVvv~SXL!@nhy2Xt(0j2HesBuyJPV`eJyZ^u0&7I zKi%FUCrhn*@(XQ)h2{!S>WHo?!-Xc~cjci&V$hPuMEp-c1uAHmF8O{B$>2?8#cEo= z4G*XDX~Ddl#H@dAH&cHdM$MFk=IJ{lPzw1J7@F~&K60Mt50>=)@&HCh7*l?VkC{Wd zPZ5FqLG(-v!3I}BE9(?mYQK~v()%VK^HMOr_=M z8do!W2L+K38F_02Ke207yE$gKGVdThpfMbA=C?{eWYo02(hnTmATmXv+XH^^Ihyz% z%`9&4SPM=mKc9ym-4(F^7?QIcY^Eh5N;dez6+3=#=&v=}x7LUW0AUPT_NDp5bn3?N z%?i&aNmbf24dh?Zg$d~A%7SbDlcD_74G1Dooaw;up zt8`7$hj658M4< z&}lTKe(5G3ILLD%)))B;d_bGc?-Qd>=QLXY9(KW(m8a5Rkjvih4J}CUvVt9Xk%b~? z5Z&~PF}Nd8zm2k+`_19#^2CONaSOIg?;^PUjt75NI(6)ZQ<{_WCLN7Xa#7;JrF_oR zj7i6(B`0=qEl_|uEGb(5xv7QR4*-3yf;#SQ%ypW~gB)FmYb)tklc6(UP-#qSP!t}u zjKYgn={CKS?^{8vtjckq4^OYl&d2@t$F1dZB)B&rB}=VsOS+j<|NMd!K!J5g>@%RM zyZAZV0(R+i%jwm@LPV7O>?ESNyhSZ5fn`;TGK{+T=B|@x{HgyVzw;DO^=oB~wkTo7 z2MIzfoq~=6Qd8wCwwJJvLmAjq+y9^^G{E}3Vbn*Zy=^inR7rt^`7O9)J8%r{;sC1eN6|pNe5fA03 z4cW4D9z%jCDJ6d(k#K9ve(35WSd)NO=JChm zrhL4`@U@DX_T%*HkHMO#Z-NM0qOV(~d;Co@7p2H#D!!rlV?uRH&5pgMk#N7|lP#Hx zrJCiaz)Ml6G|iED(aM4WZz>Eu{G;}ecO+6P%buR1BB4?rh4)!jqlDzGrr+doS@l?Kmupj<^1E2KKDO=B`V;c@eu=rHLH*f! ziL%ED8PGiw7-9ly4|scN4sLVp<83~<@l4TSyb{z znBU8Hx~0Pca$1okBPHvAI<&pI@FXdRN_}>bzdY3gUuAxE+&+yve+*j5&0988OM8$} zeUUPLWsBNZ@(%_ohQRhGdzHb@PDCBt?oVY4naSh+nRmjI&G-N8LuM#3>_IL&$7 z6UslhdC{*zFccp1e|5!f8jz-#Mh#i&-u>BRHDIV1OGbvk@JTmC3zZo0Qws2H&>Y%y zmr-_5F13ErT8OV>`P%tIGJRB$XDf(W)@bBL@^=5S`y=J3Uc!-V;G8yRL62d`lh~`5 zCGnY=)av%hz__tzy-c;xTp)vjKk9YkEt`CAiaHE{aO^C@=HB9D7bXdLNZ4)( zjr}2Z873GK|6iej7hU>KJLhx$S2zo{I~3An2fNtml?$Pk?jpG^;G6|PT`k0G<=NRc zHAX&-DgJv2oletcC9b6^F<6@hg)FxDzoE#vdN6mkXPgAn0%aIMq1g4t9tvSXQJ2?# zo{o=V&9l+A;rnmzLmHc1fb%tvlr+0G`Zk$-Eak+Ef7fg!&@`x9DcQg;9WJOrj?ht3 zPa(~+%EUuv_WtJ$)N`L5r^ULRG<6zV8k^pJp)sF_dVaw3OP3E&okgeb{URQ_--pZ@7wY zrUW3u7?JswE_bzaejt^Wp+|0v_lK9)QT3;s-(u6jYbPhyj_F&yt6uXrYT6vf;F1Xq z#DKT!2t$|O!kGTGIfGB8=_%r5#1IP{wQ0A@S%O4~Bsuxh$qWd5=!(i|!MW36ceHxEC~QLrUlvH@6~lji2C8i^1GFXhyj|=DznIxipoX9OY*}; z#GA1%q5fmR+9J`gefc;MBoWEQkE68Tt}*KoDa^*XJN*wct<2wb=|E!MNs2T-BnS#` z(D&lIVy0dRWzeVFcpy`$M`H%Xa`KTHJI+Xw8O&u3*DRl3#%sBzDh?Q_?fyLGrD#r? zY$I^g4;6Ti9dtStXgZ6KG@dwOwvkMgoJj~FGOQX2n1QfrUNl%^%(_{HmlB+t zb*%pTgKG_j(J1QZ{U*i;x!-vaa)#b5MMqy#P=&nB9!D$97aNL`BJW>a zuGq?D)mJl|q}ml$DJ`VtV$jQ3iArlHzlo_ni{B0m5O|B%C2k!=Ud*+`j@;ld_9C0h z#7b40Zx~&UT3vPl3QdW(>sX^xzlTm%CjhE?04gu#1NoG`a$>WWKl7tPZbbB1($2^Y znYJL9`~J;zYqR-5zD(Au{dTt)4aRmmXb7A8HSRRm?pGm0c!x3rVOcpS!iKNzAo+2kwNt8X~ZPnz0lum{oa~& zWR3SG$NtzV&8?gZCe-%WssZnT-YDU!+Mt#j42{wbnP!+UMB0E$5;qFBjcW9qE(wk% zJ34b?`&XzA_XXXKT~PhX{bLVD|7w2&`;%4rwjSN;Y4DwMD?2T%q~+~-9phtnc%%53 zyhpP_U^@uis<*|TMdD+fJYJKjgSmQ^WyZ1~f;H82&iw3iXsmVGKV^t5ym=?zB634> z3g}q;wHn%D&2=CmSXbAVgzxB=%faok)M$=mWsz#~R~^9}%4hg_sC4lEnYwjKkBJR{ z+I6jGg?lH1ARp<^z0i*u`3K1yx*cOPAElZF>lHQFt>+x%)~j!Hl*{QB(OPo-`!qKT z2FrluG-t7Zs=J^fDA8VM^nhQXKa|f4I+UVfv{Ct6@G<>L;wfo(^eSlfM@yPlQqE?7 zBB}ufI@Oxj8vMp_pC8-O#iMiv^-Kes$~eNbJp0lcAl+RxVk4*QFc3iF18YUinzq`Q z1A*~E??!x2g_ms3MwA=zd-E4mJ?aej0`HfDBP+GXY zSy&#(<}NHb508`{>>|O6Hl)RJox1HmXHTb#qg^n-dDtN~jRx#z6RhI-$n*8xbqD zZVTx)o2%XoqKVLAK_-qMp5h>?jS3Jf{&t9h%Q0~w=)vA}q-JLEgBY=#KxX1&c!B=%Ghxw z^5p!jQ_EhoyWrYY2+~*emc}YsW9B?l85dJU*>Q8IWjvHkapCPnEO9c5H!PFOU>%a7t3HGm8z%@?tmi(RR?6aP zLOSba8PG>DAX5KDfEFbo*w_r4H2qf7i?9q7}HwBg9A}gaBGr>Hv=HI0*PRX`#&I4Uaw+XvCN2LS!_@Xk+jnPCP{+} z1ros0TrAilU0}Nst@S*+8oYqHpDuhMHWgQ=>(X>&70I5n`- z{eLWgl6|qGa64(~1Z_Q{vIMI7=H!DaMhCUNdXs{|3lL^2UPZbZ)bb5 zZQD0nla0w!P1aO5O`L2SlUYJ3Bm(5D}{ zP)-BAHVX#&p+tfXKgJ2r?c#kgiqWJ^8K6vyz64P|I2@IJrEN)>sR1#R61)6j4uLYB zvr@KL2me`B0P8!hhC9C-`aiPU1|CmqJFN88>KHAlL3~LHC(rK4a?|_$C4xDN&j9#w zZ0G%Y--46(;%=v?562A6YY$TZ&A8`srW=`Izu}GolIt8CGI=+!k{-g`l0l`pP~Uin zh^{N+T3ruDUQmh5;vHjzAGVzIf@}kw7E6?gA|s#WMs=8 z5jKTV)(L7MXyiK#UOcL4;olIH;Opa;2vZJA*B9U@Q_x*T}t@oIa(X00g^nDsBCH(V|xKdDTVWy?{D2Xo7e!WrF z!73;`%2Zd+lg3~bTOXNlsULN9t7I-JOYHmu3wZDfcGXfJ&R1W5ZdH>&`Gi0$#?Jly zrAIWup#rA+iEOsu$-G~!I8>wQ$2k>g8eZP3(B<+GhgTDSi@4yPu?!qIFyJ{fjwHmn#{Z{hxJ{((m7sh*w`FTld>CEnl0td2S8h2fE$c(F9@Z zt-xeCqQ_GtNT1t*=1R5;LD2z9dQIP}iX1?)ypD!!VZ=UdC~fcG1b&SLnqt(7V{~co zQZwmxp&OcWbdK6W-v$2qG(3)WEQlT!u-Nth310(wz_<4X7f}+eLa#$4bdWL4&S;8= zym;bzk*i&-%VC=nl$^No5H;2+W;6>glgIl2S*oaF%evl17E%#luh*>~3bGEj?|191 zTz1`WaL)Z=9#z&>hTadn&m5xhAsL%&OMNrsRFM}js2}2Vj`&hz+$m90P_Vgdor@YW zILdh8SqQ@5zW9QUVG5s{bL@WNYCVZEI6SI^jwKC7-pEBE1ry(xHcr9vI^#F#4A6sC z*+k<@bM6ivtn*Q4L|$&3Y!|4F<4~X|QSmdj)$7v?o6VGE5WNSNl;q*fDi-ph*KTuP z_rT&v5a24YBmD5$<*@a;>UCLwJ!FjEZEmvJ7!Dgs!nX0VFQAAuB?}NA3`Mo$u3Ao8 z(P;TfEC`RqU4wFloR)KlM2&T~3ZOe6hlrYP&A$wSm7MlnliTHesbbQ%MV`^3`8q@MXgO6?l*kgeHkT)n{*Sn zfhp65#?nkvi0Sn-{lFrEjzIl>3&Uv}5d6;KV_A*hG)aLGBQ}HRUpo=sa7UQGz{lq2 z_lK-f1XdPBaq~zLi^gvqQ2#|El-m)NgIo~0+AKs6rH=wtpf!jIMHofJj9DYKfVk;2%iz{ujYTE;=wc^6(0VG9V;|?p_qB^_v35SW%08b7v6jFM?IPn zo#=8h-%A>ms;YuJbZtUTXSEv1n&D>U>esGq6b*M;cv#5on(0QShh!Nw6od%W>&owh zJD3)7C0Q|I;{_3rR{3mY1F%t5_;w(7Z*GX>&#ryTmsi^}LbdXkSpcICS`3N>cUWeV+$P2Mg_5L^k0YNyw;e?g1b%AMPJ!38lr(eXB3*P?m!Fgu5mOPp z#7Y;mEvVFOR9mRhV@ae_O4@fk$;PE(d1SBDBgq5&Itk7uz zqzyhfd3f5O_t$}`aj|v@L)Oo8?(7Rr8Hz+mYjBJ8O80h^uv& z7zZXO8(LnP^6qmQ*LbLVt)X6vU&@-x>iNxEgQ`5$Qs!_dLVSU&fAM;}(k%m8GgDRv z$;Yr=VWO#Hu?1-Yv>TSUcSK(qi?(-+=p7UvA$rTngSn3>yz~T6fz!(m4C6lx2EJni z!->RY){Xpe9wI4SG^YE5B6Ntc!~<2yL_K+|8;cjf3Z`vx`JITfLKsnPjUVa&4b)#` zkluA|CWu*(KQkv+QCVzt%h;@f&`4R%W43^*H`7_5cGPD}Cs9V)3qc)e|LY;QquSRX z0&UQ{L*U>-4=F21EiJen@>o|@58@)}Hjk;izKu^{OER|;JRsCn85ogXF-*jW6;9xX zYuKXsl#&BDS8ttUi|G=bFSusexz2V0-_eO|t8->4#DZ$q?B@oZ>O3r^oVJnJY5^*m zXkGB#c2$-8x z`Ov7Iv@j8${v9UW%S2Q+-BUdu`MM<4gE3rEqvH&}(G@|J_HE4l-?P(0EQz;-KrSj$e28W_ptT;1IW6 z`*NC+8*R$QO+n6_y?pOTFu;p(9-gb`iJIM5n5JKpy7Q&d7G~}?3OcYZ?}@~Akd}HI zWs(S$U3-;Roz^^7G*69iu&>8t*nSs(&hZ`JHd^VQ-}`31?`YCEp)*u77lIf8H`iGSr4-fUe9W|=zX zt&3kll4$@#Wp(Q!gY{&o0`Qb$Zdh?of0ioegv>x4KsndU^3hs)Z#B_GkG&t;@8u>X z6&c*Fv~T)<_;udl9b!^ENM9XAJvgUY;=~GW9k?;LHWGQ}g zt(CW18OKe>iLKSxi}r*&yiFI4$W;;Fs+1m4rQNBazuvO`l3QevY0XnTkc0f2sX+>o znI@S0?tAE+j59Sx57vzzr~8}78}6h4Ok^a4ozaMZw)Qw`y}?MouW!hS`(F9n>u^#x zts)(Lm9SHP$j!?snK*pFR2lXj2gE2$Z7h;2{?Hcsj3nrns(zj4LmGLFonr?oD zNx{>vja0ECrWy7W!gfhpdIl3f8-W$Qx_CJBPq_~l>zM)?kKhc#=)nZ|GecZFt~z6sYp+VONMnXJN%gC zbc9RtYH1}3YW@UB9CMH?7^r+=NB6R=CFK&m=(yV|XTC`yF5R$U!bA zIk>IFv_jkjs!N~@pptncF(csj-RA2LREc~^G!*ZxN^GrSeeZ$IAKyqFmTqlTQ20xY z{K`KQ8ZqWrMxj>!!$iO}ty#=+U$|RSH6@IclwycvUZO-Lll2_UhGNm^#qs&=0|*zK zH#)q*iCsUq<#zo^I2_#vu@>Pj%!V0`glIpAW9j|d--7P_a=YqyML1&?csKi?Kl|>6 zGFJ?w+=o0ZBroJXh6UCh)o#&5e1EK@*6-di zd4rl|`YOylc?q5v>SO}m;@j|K`e(`JQ?(|)X6JZH7_x|z45pqCm^BUB5|`{34-s>) zxurC3JOPm~fXGAqDYOw(0!gNVQG`_!RATK2%76Yx!+CKse50CZNsNdgO9e1;Wy32I&*G*1D$`!4|!zohv;mESmaY~3|T4} z=iskWl?d^E@*g!mO8B4=FXd0Mdn{XO0W+HTF*rQVanuN|P&$DRtw(TxPMDsKQ(WE^ zQjSNQL;`iN{uZ(D5nlOQ@9*A)62!BIFkAiaO2`q%?dk9`4>x9|B62FbwmG-vyGZAm zOo(ng;_S+b%x4Q7KYh`*Kks2bkQV^vOc(B`wgyio$Vi#lCVL&_OGbsxysvm(X}W#X zsPU-3V$$pN#i~FuMYFSq$uLS8PHTh&DMZaXc!o#4zEcG>8#RkJ6mvG}$DYsVm5dwH z%BSJw(c0xVqVM=2^wi=aHV-#fr>4q?vab6)CZiHil5n^Okknbu+_)bdE^*|)Z%sqh zv%uDPn`E4a{>ZJC{)wA^3(gcHKuaNOBF5cTd=}baQ#Qq!xVhbphid$U5%Jbf^!bJ~ zSSA6p5qcbX71CBO+-A3xj^H*8x}=4UHHmZ%Y4USoD?3A`@uj>(FS{?>?bmSypkCVJ zTX&}%mbfP~Y6Y#shgQ)88}U8tU1c}Thh(b~Y~-c$-Y35=_yaMU$g-?mB=(>!Ldy58 zmiy6Nic{t5TJC>#BPKxS^(TgZSV{2?)8se@{F3`2+#w8WoJ3|%rz1WaUf_NGR8fg0`03kn^mSmR9>0OP^_NYv6VmHw z3pVVSt3|uvFE4d{FB|#Bz5vb>5(}t~w>>V&u7y8gPA{j5vLfqKq!eOH*fK%72yxHK zqy2*THnFoYC1TQ)Z!2nY2~{`81g;m}t8V9ISa$R%h^e{cYA7WAH|Hu zb7n`sr)Xj)F(JKnXJn6uOqQ07=K$vPn`~ldgywtcn`^Il+!!`wl3rEwO>VDtfGX|i zVL};ItT5-@SHgSwp%qIF2ow?eePnZqv&!Y(76cv)Ire#a6z=$r83gOpW!T$HAsuxD zdeF+x{O`NtjOP=NK3^=rI5lZX26m2%8_RgA{pWfr$tbXM{8C z?uLcloPSgnulfb$a7+3SPhiX)c6g)MuMg2^rvk0BSxb$)@`;^Syl141c8Y@bAR41eS3=TSp4oV1gHE|u27uDbv_}n*?`?%OPRzM zR6y?}UMt@ZA>0j_tuv)51m~AQfns79_AS_q2P!c_U6!zQadKB25%aleBg59DshOdo42bM3D{Yn<>DP}a%D&@1CL_nCq;%{~urh&}+Do#x z8zl1GGEeDrF)y~mrHmd-z89^BdnXk&3(-FIOq+7UEtqd_p%~*@7&}Y+8Q9lS^&6jR zJftKx#=esx&AZeDo;MD|_Pt1+)8+tv8R4a*Y)<(*I6piY>H?8&nW)9F)MA5nC|&}D zBNytpS&95UzbD~xo?RbuYvZQ8yHtG^K1Yc=>eDgV_L&k*&X4Ojk4WvBOAQl=*zE(m zj{|&XyNr&o{=qrcQ#%F4B)EZ7*Rn>O2F_*_h5Ki9@zSF#3M2!;w@+W?urjZv8);W_ zMF-L+Mt4m*w_r4J9$Fv5!uuEQ2=KDPo?px)Xj+2nT8DgU*4Sc&?~{sxv*pCLS~a@B zk~S%l>Q+A0PDoHUi2!{b%4V9gs!DL3;zyH_C6@|Ff-Lt-siI6SFQST%O9*$V?$&Ft zav5W38Se1Dy=&4aQJQh3j(&;WI?Roi26~bnsO3PRwdlhld9@LRiH%SOXo2wQO|d7? z5a6Sc_rI_wz_vf@Eip+Yd{sb+fjM(f!(7Ek6fpIfw~R8=`fcHQqRz$w^OP<^f_xCr z$lz5w)pul4%IvH>mg2S%g!N#x;Rgg9$wP6gVS})32QHK6KE>f5_Ox>UT07M@ZlWSt za|<(Tr9g+I3KyOegC``UW_F2XcoQ>^n>oq4+Ce>6Ql!- zylNk|M^DqCz{Ngw!>6u|310z&z;JhcR1^-}>Z;Kcdsd^9_<(@Nrq0QT?tI5s3sQ_)Wz6m; z3mTZh`njP8=#MBWyVatN4)nW>BU$riXqE0`e= zr-)xsb$=bmpGEOOtjN8VBaH}oh8N%KcD z0B_N$%Ac0^G@?r1gp~C9N7i>YpL7oM65{FR=*F+haHks;$8sMMN08%2!WuE~Go>y{ zXrkKE2#>b5hKvCiICb1UT6caZN(HLc*44*==e8{qOk`a}^cw5zAH ztadYQpRE4Ys~0Prs4kb>M`LOiiHxem%UK5U7M%Rjyc7Y4JB z;b@$855(zw{xQr(^=di5eFyKI<)LD&IYB6C|Bgw6Ph(o692nRAqIuQ9$w+(CNPB9x z*bxBg6Ssf$!a4tHIUd(NbH8m!_BoCRMyqj813`7dPO*?}U}Y-v;`Te$F4L=siTbbr zlb`TNK=GU5G!KB`DTgb+)2o6{L1B;RTLsV zcS+Fba>?Li&giZxfT}L|G9inRW&Z*`RB;gvwYSb(^_$Bv#^1rf+UY+$rNm2-y}Vl) z9+h~%5xN^&->QuS{I*-I&3+tBOQOT}YZ{5@_A+OM zESp$kId!#LO=P$%nS~)D`a?ARL0Mg|h44(jbtYXaINqxmYF zUh8+sK&R|BQbzd3OJ=(JW2y;_ObTCR`Cco&zj=SjF6fi$rg|H2Ka03AV~}=eUZ(S5 zJ-DAPjMNR0G8r`6#}EF*8_UJui6__+?v8F#{Mp|q3<8sgxjb!$A$KnpNdIIP`9#;0 z@7DT#kkk7)*m?H!cUL_PizF1rD}2;XIA~M5FX{fu6%adKF*TN(ibNV$-cL=^4YeL* zKr7+yhO3mE5BqKAsP)@3bUcvo8^PrShrQX~dTQ?UfjlW^&)h1U5 zAZ359?|H7l4w;N};f>&%_2tzN12geEGl@ST5D>bY%*0&z{Ji^o`(+Vy7`6NOxVhQ{ zF!Bec$B5jPZa*G20b+{f{kysR{lA0kLhm;dx99eAuI5L`mb^{Z^NJGw95B+DlD9ck z6Z;H9s-p%^oTn|Im^vPnOk~*lKhBP^jajA|l|+tFyDzZ4-eR*Xf^S>U&c1)wbk{2T z&aS|#?}pLAAIo|Q7S#oZ$vB2?V6?znYrXG2qxUU|qw076a)xN4pgDygb_(O!X2z5h2{@IArVh%YG`d(R6RmjT%7lY-jV# zTN;P?o`f!LAra5lBotzPHQ7~}tMd+ktD=PRL9qUNMPBbF?m_%WmgZn&bmO^gf^I7# z;V|y;T2olZ(?Pp>(>LhlnCz4Rv6dr1F+MQQn|U%A!?$SScI<%uMLJ5Mre>> zV$YHg+9cDPPCn*G;bsB??ezLtKZho3uD$hcj@}=K71^ah;(%XlXZ6>Z1JUzAEsc-lQVby(&F@HUDD6vZ{eaH6E~_^9DS|OT9&3sb=wimVJ&UF5V<#3-!|*N>^ClL2?=391a#T!=w4oAhlsFM3In^r zvAkSy>*EuxwPr&X5FQ9+>!%&mw z(sCcR$A(eb)#3Idh`T>GWb?KE;+W@^qVjM{jik_HZEI_bid5fEzOMCVp@MBxgbzL8 zC3-2hY8q6ImXc6o)Q*dE9Blb?wT-X)rA4oh!OPk9Zp24_!zctCKl86GWL@I!m>mD$ z3?hQ2RF0zHn8|j7J$-M;3(4(~hf>0!)~aGXH%$w>V1K&5^pA|x;qNP@X^;MRqe8}G zO$sCRUdwGX)~QyP)Ni&4a=W2sVj^#_0BazHR|Mg)AQ2bo!SKb5E}DbXV-q%%HC4pMzCSRt2lRwmvb;V z+u*c4<(X{dr3?OgN9lIZwM}_Catyuo9wYim)Sy|}p^v$-PC@QkV?^Z-tTs$HgD%hG z;?%EX_Jw}Q#sy?QwRHwC0W$}fnp;R4A;Ec3n6;ntoDbdDZlY?(>+kpbO+DI|w!Z4S*h}SycT-X{XJcyy5pjv$)ACrSEVq z88PYN#?Ya!mF?3zOEo~r42Zk=!Th$;rz?8a`qB9?{>B{jVgHPbZs+`F{ioqkpAhtV zWfxla1Uk*`oSSVt>}cWk<~{WP27Fli5cRRzez!dzo!YrRQ(g3TGrB#c)_~%DIcrj5HH8k$oA%8&Cf5yJ{BZd4sfFt4wD$X~TExFVr(MR(3s! z6?sTobDL8Zin=RIf4YPQ!wsjU%`WRX!=H%1mATeT$^GecZ$ycJCbF4MNnupj8b?sr zdTQ!HnSLRfBkvuy=HwUr(F3=7$7)n>m=Ywk+W1VCF6^E025+|&d+wUi@p{~@PGFK^ z4&sj_YJ_8_U?WSJ@@ zmU$f5L(6z`zgLcC3X8R?^z#{XkS)|9+ha>^MKORL>&EckU>9bHWq{7yYbuQ#eL9V{ zExCmfFw}^t5DQX4M?@Uf3c^QGC|8w01*bIJoS9&Qx3A;_hpDJ-S(WE%y#3`6)JDa| zJoXp6oj=zM__tg5pX(+{-6sDC%rcTnzo?2n7p-~iMrWi~RJSQy9+M%0L#%B$aK6<+ zH`|$xoj8=B#@rr(MnaxbIm1C1z3%VDDcdH8S!@m~o^jYmu-r=DnE%?%Cke{|?_4EL zBsazd!U80FDAckLz|;en$PAngmuv=YN(&OP_A^=OHw&wd;8JW{Es5$7?>AI@3ui%2 zIz9tJJ{8EBN4N3c-^;);4@-&ssn~Py@@aQ7E2P;}szAEMlN;f}aJOd|zvxq&XrNtR zaj=TN3Xh-tamFM^1o-%#=pK}sa|hgQ9JOH~gdV`ABS!zR49<}&k+8>wi??91QGf|5lB-Yfd75@-+Gb*pWW$cm%LQ97ICVP(cw7dOeEHcMz?=m6&OwkNw`p` z>?RG|b_>i#g9F}v4`cD{SB$Kca(+Lzkm$0x|3>B$ z&j#BnMX5FL)}-3{o2!5p5pT9Xy1*I#7ZqS8yEK|m(Wmj-Aa*K#4a#Mmx*9gRyGH~e z0%+kdue1Vh1su~s<4fBCS=l>6^g?API}Q%S4J4@8lba$hBhwHvgR^}6f@ zx*VeH_1g(i^oUW||0d{Gp2vouL8JfGs&(wCA)?sK=jL#~^g!T;QGkG)MZS|hGk49i zOUjfKX&_$LwVxQ>_x;6IWmMX%pGGrt>AHq^v-ZonSo-Hdcrb}I2Q1NsY^s>bqg{eD z@%7;$$Gm?(6C2%6!9?z0>V{IKgx4?;dPl#jGPr7;8rBr)%EVl{$+#S&70Cv&-hfsW zTo~DoQP?ZlGNH-d*o2Q}lA0$9bFJW5B$sQssn<9{POq|8W_S@d<4w+UkgQM_0)#GT@VVjOdF^ewpDUn;*Ex@_Wn=q~Z07G!tNnZurnFM>(00M`f1(hBuNv z7)qU*p)A3E>9;tq<3@p5^6y(A_--Bo=oIPBq~cY7uXetFYQGN+Sz~hlLeTjKiTy#y z4Y783u@(p&8AM@eR0%{!Bcp|8B_ek}`HPI{C%W}((}VtRz-N$#MPH4%&BFmiw9pRL zDAFsS*r_7JS*gbF;bfV%AB<=nddVl2$2{Net$ zM>^F6@9DgTA&^~YI_q?APsrUYJ4^1NE zfznf1^CHO@AVSl@9-r_Yh2qCXS~X|oHPsy>B~G1Sj3 z*RDD_Nq(qn|13w&wba?fNx_!1<=N+#Ih%k>jN~HLU*Qnyl)1^N|e) z5tZ!tb}+%1yaCJOhI1?m*>i?wfR&f4Ze5`qEh{Tz6ut@e^mF=t+Rwmy6L%_^P8uB} zV2oyd5ySx=4+W=(Q+~&bqZf(q!SsLZ!6n}8vEA9gX53-$=T+A#N!Y+m=wj=}Gzk-WaIXy27*MEP# zJ(zGIElDQi1tYuYny7gmV<1p$krg-imxa>|ni9Gn&P`?uRXB6&9~2^A{#*hi+lE2m zjU4|bbXkR9Za5kyF~5-Z_fw9R(peJ?GLY{^`!nRl z4L#=)n(B{XMdYZC$FI?gb&{uIgHJ!iTolSTxQsq#78U>L*CcLRn>VPY7NMB5E)`#e+e^ED~+VT=XZfQ%dgl7~d0 z(G&piL)?!^r;KIHYASWCrd=|0jR)qBTWu}}4AlJ2;dXShbFUo;219Jt;$~`hzb$`C zK2uxS;$_pd*m&JSuMXvueziH_=aircR$}>ZGfRc#T3GC`iK{ky{d{#I9g9IrJjJ3R zwZn=>f`*TK5H%EA|BF+sjw$`*#k5&T*d>YrQpe+IaQG{SG}hPKxfv@!%W3DsiXAdD zM?_K-k*6}ME;sQ6E@6TK?bG<%Q@|sxsQS(24dya}?z_0`FM{IvpE2&0 zxg}ikcUGl+D=75T^5RV1<=*?<@}yo%oj^gliKgeW&pi5HM)mYo6j+=UNKBBXy=2t@kKjFsxOOdP6EtA(i0dW%u#kirC z!n#R$XyhNvZ3oE}<^|XAtlJhgExs${z1J9raL?earqAv?YvFy|aGVgS?78!)BIw95 ziO$$Zwz}g^sZ6VzHJpDF>4KSp)PR;&F_uatsirHFSwtP?(tx>2za`n$q?_qe39~x1 zC?qo%@)nK4(TI_aJ=O<8>C?D1dpGfZTwV_FoP;gP=FXB-KSOlSUrdc>(csu}Ty&DgkPxNst+c)9wsud+kMhMzg6vm@`eKvleFPPNUlYg`Ay#$BK7E!6v zMy3EC!UZ=coV#GZi64A>&IF6iAOObT^)F^rF8Sc??gdVbc_Q|Vi@sH@yci-@f!MP= z<4~`vIiAe-t>oMnrVwyH7o2ur{!DznyVeS}yNWQs82iWR6*X1O7`eLh__IdzkBd+WEQU%*aBGlV==YD=KSjYZwFmQ-TD~oLn7) zo^o6EAsIih+C>)?Ry1w)d{)omnb480`58Giv63ldYCGrjH$|)%g|H6PfK7+4URV_t z8Oc4`_$4nm9*fHn>Lw|XA&htban>J_nz0Zv9b(2$no|4+=cn@Y{CFSatF z)f=Oho!nutS+4(>Aj?3~Cas(Y?vWkJ!I^laeBxh1e|zbH-IohD;qio6(PxGJ z{$DXg#g3_kuSU#OiA+q}Q~Ub9E)c$nIl*)A9oq8H#i+HzNxDv2U#zS$X73>?A-_sR z6Ddi2y1_>fjzY3_1%YGE$(U3!!<=Uf%6wnv>6?Dw#b#s3UnwhK;DorOyZujV&;r%P z!R_mU- z@>nMrdJpv{Xx3qk-)b-Xw$65~o>#f~bF=ZA1W`<43w8Q^`99?|K706&$4@t$pzm;B z@gN@6UrrLL>_cW?p^n$K7W8WvKmDF=J6}+)%pL=4s3uZ z5U``6P7K>eMmbi{j7eRVdS4>NqGShh{eV9z`?-B2>=P8$IC@g~r{DPPg_u_P#w24! z^SA951M6}Ie>$y8pk2;LzJK|B&s1IOLe`yAn_F|ueIt@02L0v6v>&i>7j%Ns!^OZM zZXUw@^miCT2W}d6?&jM(9U)Nescc&n767h0c+Ek}+&(hwK8BK#)H5XgPri9~81#hO z=<45@YoRGZ7yOv%1r6ILWrrF3&{CF{l&E-`T79o>f;qud0G8X4{-#ZAy>o;8eQy&? zcw^9#WQnCF&zcz}^T`lNcP_46mP^BCvu(DPXVZ%PJjC~|MXE?7h7mPMWB_)UBG=^( z+3;56)EkK#m)}7qMPT>>LZ-%W)q6{wj^PBMo)v-^iBFrMOA_^D4JzHBO=lg?3r1~$%67d79E`-eUpH<2JcwC>_ZgFuq z&c|}WzZh;-Bl*W6n97@w+a zo}X$3#Z$?S?hO8HQfl0uGP>kH@tpkMZ2wvGc8R<^?X_wN+?(;jeC|Hz0)0ruYg4_N z*Lc6ZnJ~ws-WCyxbAy(+r;-WF!O;+x)wo_*bvgNO^y#gQ<%h*L{wCeD6GW!Lc&uGv z%D`DK(Dxv3IUEe;9x~AY07IM)9o(;*F}=W&d^gH0C^G5 zW10y%`Mu1eRDiRZR2RH$Qs`~$QLNAff0<5A&NrSB2cMd@6UO89cF*B0RolI(&bj}l zPDvJ6fC*Grn>C%D(4|?QpAXy01=Tg}w^caCHCB1MOhKzMKq+T(r|7$%n2CrYdXY9h za#^gGRMmgGcku&VFD4dwPQ_sToUxC&D3^6~oWDGW3$VMvE!q-S{=i?lFcJ2QTyuI? z&X5U+f;Y@foJ#pxbmNWv8Vo6heNaUn!53PP$F?h5zbg!g0Hj_zSAT%<^>FNU%JqQf zO^ZbaE#JVIW*k)W(SUeE(p0|xjQ`RHSkrEt_o|Rw`HvG?`L7_w0XFA!)HGx3p6DU# zZ=QQgg>7RA#bf(5V0ygSB6&{d8xu~~1!lb#a3tUF_%El~0jEG0UlP;^w_FM1rL*Rw zf$D&2h2QUyv4pd6hlC~(WsqKzf~c=gBXM-2f3~nHq1mV0s!SaUwV=Br?;rGhlBoVS zh*z-f`Q1;~0)F|IqJpgQ(+=yP?J%WBX%&A&Z^q^7DK$m{FDbJ`^q~@cT9I3Huvc{yWm$)gX;PmixXULvRcF zuobjG%P69z`c_G;Rb3$Uk@d4W;dW9;aimoNA;|8YMKLPPKij_;QRaFo-8zSX`YpRh zsuT2b>H;qPmjm}WL6qvqG~AO7O1tTK{&zook;)qg?JN@FFieE+BMtBabr?o%H8}6> z?GP5e6!&&TkZ zKm(Y;vAEs|BrLE$&jrRp0>*9kUnaKw-Azi*L+H;{V6Wr!u5Wz+$LR7F z-iGm=j>-DdRK(I@dT~boQ|AYyKtHVeOOUdrR3WN&)TVf}{15x0ax6&N9tKFk;X@Hv zaSY#V+B|1+<_D@d^6Lc6s(chex9lQrJDT&Aruc=bRk`f8`y!cSYNL91Lb_`U^I-@5mW3reN`FR6V&pt5k@R&57QPKYXq@Rc>BnKi9UHntK0Ep8F)o}ZNeol`ug<9W_ zTyB2)@uHvNb^b4*Fl*88YmDqZELgu#^gv zX8huI!j4=;XV68(@r4IaFaNUDV6kGTphwoWuA(cwh8_t?ayXFg4>@zIq5=tTy}?A( z$Y*P*IXR>WwMk@BhVuXC2k9Ur@o!K0dx!rx^a-iv&%G>r@YcS+g6NHAyW%}CGgUyk zs(|;O|B2pl3PqaZsrOM#zlr^9)87p)Ybn1PY6Y42zyHMq)f+<^KL58FlxjCWOu|mb zwT>v01~Hg=kWuF4K_Y#znOl;qH@B{kIee1WS16zaj;o7SKPo&qeGX26`K{&^=s!L!lIWOCdj#h4)X|WC9tHa+b>jK~ea1NP%cFd6a zKN}VP6-FWFD(t8O(@yA>`hFZ1?Gh1KmOM4$!w7j23CuXV&Z5JqHHjUEFN7a@Q7V4p z*GSqk1Rx4}UbKU2w=C{Y{;F5$^R({92w8yRiGn1iV_=|7!Rn&_nT{TCuN{PKiK`__ zAUG=_zxyZJy$k#|{E-BHl~q&1KyO>-okDIcfoFG5fn+-c1F<8$G5 zz0X+q@r`_1XI(*it|7M~#{_peS0Vy6Xns>Xux;)NTIPI6%Cse>H?pEy&#_$z9d1QJ? z_5iNG{@(%lpxi?E01dVeY+6ZWgA>BSYN03ulwho=na3CcE*TsMWz51vrqhPlfkm z=hU0jZ`hBw*bD+0^?T|L5JIfG6co@b-)Lq7>CX z^1i0?L4P|ucM%zvInJ@`xdbI;1!v^{Oau)0`;Rci^vLErpx?jwjW%=|tdza?;$`Q5 zp`*TCWytv9$0!96Jj;5EEL7#GQa3--C-0v5pu!X)7P%SfC1sm0Df z#0~h$m@$YDMFi+T@gM%Ta7!}qKx0pQ&7b|i*!%mDOx4q~E*t*SdE({gU|=cN|C0d_ zEURu}Upp-Sm?JP7l~n6#b-TqvrJnvt80mMpU+|`*0s9b0I#U1N!d_thds+)J=EAls zHKxQDtZwc`8$&*PQ>xhD=*GVt|NjGn_!h7djxs;zg59=q29s}_J||_RqhLg4w|U1% zAn)O2TA+)%fFTEX3jq}$;>nd|oVUmpk?B;)&*RnWsOti&))R!ZEn z7kTu*rH#Tu#ayKZ4=g;K{~s2>ls(LvbJRs7Q022YOuEjPSVv^-VORA?cvSc2pOe>~sj&O#{ZW1T-$;(Mg%Xg*S-lw@v&n)9?RQ z1KuXFbnzZc5a%s}r!fZ6H5W_`*w^0`Ik}657cE}bUo4T{#``WbJSqND8ghNGAOQMZ^P+om z;a%D3Ndy=9+8^L9VwT&(sYO2FkGI?Jw82~9-4fft|HhH{H_pAlV>SEo@^V5_5;fnN zQ?Sh0T5E$a*nRkiwpcd2i3#fLp9}eyHvvaJ{vD*5t7HZ8SmJkAP1Sh+mT&8OU~XhE z$Y64TW@i=iyg){X!tq77rU{}a&?B>2>N1{A${FRg&gV+3RlpFUnr+2rNr;|pU&^(UTP zj&mJWPs9D6E6Az`{t0LK-!9bOi~KYsq&mVgbP5FLzm)3h>$$5IjeV4-Xenx4<4yJ` z|F;tG<7NL;wXBa!fQ?SPRIb~sKlBD%AU+Dt%@%2tsn%&(p}XPOI6d=jk|hb82pr%$ntJx)kaof9wL$Y**pLS5PS>+W&Hws(LGbT8L0?UxoXZfw znrq9SNvBt%BO{+`uNfgAt;-e9Qy$X-zcsC(tLH#oV~Vt*eOpN^A`MdA?n(_&@DC)r zOB=VKk!38vPfG7E>TDO@ur{m`05NW0V4g!J{8}7kM*Kg)eBLLpO!Hu#oZl1s+ZKQL zo3{Bvke#Zm$I)@f7=UxYWJe!Xqe#;Hk4?%z+9ZCT8vzDB$y>q;$73ImWU~(c^G6M& zQw-F$GbzmWd~jnOU;mpGU(u|1Wz~f#8CV(wCov!=$d?xA=4f<8!zlnk;$VApx>%607?1Ai}1_ zCu-5j&l973;Cwq`41RPbRAsS_z+E}3vI9~@JB*!R@NIlX%buPX$v-pGFE)Ca%i`k{ zco{ht2LF1Fj*cdrR~_X!`KH$aiMc*?Tcc4TrMAHiDAm(g*m>6{>8{vSNYBfWOY|#) zT|XbOl_jzY%mE+OVoe5lb9#`dp(JdwqVh$lMrrD-J0hLKW|rG>;#+z+$x9iXYKt$t zC_7H2sPpV-h!wnU$u^8vUQ%r@I5<{E6@|tw+dr6{M2LAGEO(OF17Pss7VL~aQFM94 zkq33}rm|nr>(tqhkH`+UA=55+48FqHyj!62t?LTjq9KJ(qMt*V+?^F@d&0xOQ%OFwjt!DX9TcOs!Jw`v>Q#W_QOgZ|UTc zse;HHf@N@%^QAE{AMEgxL)QqAOKCG`A<5`7MgsYEY27?il;GvpYO67s_1~Wh*1dW# zCS`oR+@r#DPx0BZ73qBYSAPazsekwbM82h>C6XoPvvkwnh=ZCW=;-K*Y*2`yKvc!?;5$ho`e%`>XXtkv}Jwd{=!adGM@# zA9N}xe*t?ZHt>QNR9AjxhWC(DS5;P;bcR0qu%ALsyS_b9mSozO*aMCW7Hxz8L(1>?5`I`DM)`Vyo6Cd zuFTCk;QAOttE-QSc3&+YoO+(=L7;3Tdo{FxKhSeK|?cBsU8Aw%yb zJoJo|%SMlZhG_iBM}UcR+he4`w=N=Uxg+=YNGQ*$@nDmivB5`KQkn3o3;~Z`<0t{T zhx7{Z{y4l+5m(o@S9`p+eFMpFORHFEr4eg*O0aOAYUMkjyX%KKjfkM&rLD1IWSw(7 z-)?KX(Hnrdh|69f>QxwpD&IXaLg0e<|l4oTC;V z{{A~hh4eFxrb`Ez7rFQOlGX1hOJvI^@mg$S@+xT*mouERW>2`?v}FM!3MYe&t1*=|L90X8jiJ}uiDO+#!XV8I*n?28djozh^ScK4I+wqiE`YURcp%{w80GN| z&fOP4d*Ke61|DY|grC2N)Zg3E-_^bl^L6$oxsBpK3?8z;?Yu0pFb(z0-lP>@~S$L7A_Z18W`Ex$gfdikbzja8#$an8WKlIhSq&Jw(vvR$> z2XnOB6iK2mEw1q3%nw_Ip(rfJDJwDr3e*pXIm8gB#N%W1qFw3a&?ZUXgXVz#bmtyq z+ex0FZBdfKJ=f~Yu_cr5Lf^jg(cuqtN(=Pg2ot<;?Zxk48PT*gc6{jYKk;|GQ6m9z zmC~(<{RYd!OveEpk__h_r0~XY5Li&3!3qcJGdze(2jPw5%$wBXH3jcC4jqH30M7oE;@6ZN1p$S5zv`&eDDbMUc18ME#`B@|{miSHR-v~)VHaT4xZZpe$;fc%W#kPITh2^Q{`%o5 z5NpKqI&U$oB*(`ShIaA~5gRXWX<(4JDub8EzIE7-ffokvSh$zxJf+S`zFYrnJ05d~ ztT!JJLfpJZyzJv^ZyJ~y3f`$=e|H<>>f%z{pUf`$>fL146_@8>5B=e9u7#ayDiDCj zfBFP_I$1(5*VodTr}!%dT{E&oyGjCahb7DQ@}+wH@8@-q-*oG~Rol$zq(FVNisMdk z?q0b5#bdBW?yd0_3s_Q@gx|P*Rh((ifT>RPVOPu@nApQIHBs}c zdl7Jt-EMC0Yd2)q`T%CG{+Mo%x{|VG+$snZ#Fc@#tTWWw&em5Dxa?kcHwqgK@QRLp z4}1K4UR%u+Y9ab@1Ww?R(Sv9A;m6)rt=>i#kg_lSecwak-p9SF%8|^Is5AbbncDoW zE*N6Gzz0fRK4x0EFhK(DsT z-92*EeLrI%O>9BpxOx4DvOpec^P6gm7`e}{Mev3xKn6&OCG!Wj=PKDBVT#Ai>X-q<#Mad9IyU(SBwzf{N`xE_GwEO znPYh5b;M9mW*-x4jMle(e8G_QyW2A%cQDm}3w@eu$}}2d7iZ_Nn@;257)0)pjLcy& zy+NG*uI`o;Q8beKmBmd)EXo0)lV6NgUXRedaPY8bUAan8#RzS^~;~0zNz+()SfBC$24~ zVc-cg!M3|45$fZ(R%fz1%p3kU*_>i zsQuNQ`QEwqog6^p6xyG=?q>6e&kep2KxCdWC@)i>ILUv0rJhbOieG+R#rnpf=V{SH z;d_32Ik2{G4{+r>K(wEYr6HZ@$o}O@>;;c*CEm#Xk`~oVM6xAq?sG zit};4Gbw{R852&;=GS&2gXW1D`npeK2j|-nCDzZd@*oS|SEV{pxkBXIWb)FZtSH+w zbDfV3N>r6+-%M{Y4W8mWK;nGmj{tutAAafm@T%0&EJ!X}p@P}&X;ApXH}mc|Vb z2r|=WdRBVVd*kxLLt`%GwG=44rBMplz`CJlVSaFb*q+3f7bR^QFgp1N5Wp!t6}}{X zND}#3!Uc3bkrG2m7~=^N?x6NnPF}wc6YzDu%@u=`d>=moc#34Q(lksk2&#!fUPeL1 ziLV)H(Suh}<)57D|Ao9W?OYU}RjJA4FA9cF_g;p_?)_`vV@Z237-)bOic+B>3xr(t8Sp1kQgC! z+`aO}t20nYV-;lGK`EmNu?X8XCSdxtw4^tT$Ce#s>X3@E-iivB2*<$xzPh@KB6bCN zR;=0zHUuAvtml^`cEN+FSJ}2`<%hb&{h8QppdS+n&1qp_Vehpty7*02JD%?_Bi6Sy z*Aq5aP*boHaPUz>xT+adrs-^wH?I!qDW12SXS>vDZ55ac9pF+{B$!`^s<;^0gcD)! z!^&3`*w;5G0v(}vuG<2Vw|I`|hGLQi#-B!7Qqsd>gGR#JJ*%GamtY5?AHDQA-Dzv{ zLw$t$OaR+3QV8WU3bmYJ2}8V>PjhrZlO2W_9@HjMhyY=rmq7@rj*E^>`TcMvu|*G6 zq1Y}>(;HOj|6#)dn(1e}drBm)5G545lK3coqYdwkOu3JK*tXu}|8jDBko_FwBgYU= zE{a;v5}4U95yoT1402U-nHl%Omnp~LMjfay{g%ruX8qJ|8v9*JRRyam-$tDFAY9GS z(Q0X7PvRif>yiv4I0!|0!_dVh%{?Bdw~>gvkIi3e*_&j{h;F6NvlxOmlS?}M+n$>S z^W^MHZzCu7&##XLMa3ev|5`35ZqIMl@1~!tr2Jk^q?gk!le-_?8wuEhR;Kh36udKd zmiAj#zO`?0Lmorq5b5iX1on)z`_GGYcga$amM>+YbBzfb0@oP7aB$_?t(WL}dAAcT zou(jk<8)FLnR=iXR2)rs&VDb%jQ~D9M*l***rVDF{i&dS+(Mr0;~!*o!aXcv`zcE9 z9r4`wTm%kQ7eYtW{i#Lerv{CaD2Fk+`daq&FvX_UK37T(|cgk%9P>lH@ zQYyz3e02BNQpdW}uGW9-d&}KSpe?AJ?w0Iy_(dYXG7_l}F`&p=_o-S8Mc)y^Ya0=I zV1A-*I~yfR1;wL!ya!_0WqiW|>y9AvZMi}(?_b2`76{n~)-G*;k5@1p&KB393uIR8 zuv8SxecMy6_6KgJHt+vLNbDFKYVjl5D34u9;(UjE4PVoTeD&oVz)qNWJ{(FNgsO1J zlUE~b8U}rEGfdyB5C&M>X$L+4kBRTE~`lDeC|PHs8w3f(;qp^&&j{B^Gmcz+XbV! z3a&N_ln%w9*sqW=71#&BQW$u{a=3O2lwiTnx2d)t$rC$9L3szxMorkA73&?Togo__ zczd~R=w2A~tjsaSA9#tFBNjb{QvwJ9yQ;!UN;<>e9w08`y2kKcUVzMbMBtuj35J)GL7enoqUtksP>Z%2s; z&6Pe5iOJkZq7M&UxqA5ANY?%?)91EOTBTAXt;bhnPb}{lwj`aO^8_&Omeh+9*LeZW z9<~+KhHwU#ug!FRD{06;@44%ov!>t4ZQ4ZOubP$_iMGzf7!3K~`pMvUS&BjaBROe# z`7!+yko3qA3;Dcn_hwo#p2m-Rs*VMFT3>+qnWyxHwxHYk>t#W}roChpPkXzjRboOr)?;~V*Kdy>p@KM`Q znh%C4-d2%@BrI}>ArG4>SCHa3=g0)2aqc^r;k%(PqNe+cKZ?RT9aKbwv%A_A?ZWHD zUJ&*3-N$|~U{~hO%fzE{5l#?=>c zz3lMaWQDQ%HsvpG6Cg0uAxOOoAV(ojANKEvUMe*{efT5-&s6AD`qE>8QCkow9)g1L zfa>4c^HTN_cq#NH{*QN_d|hZi>_V7~TWCyv`|BYo!jt@%cbr^Tz)#*Qo?5Hb`+FLo zN0<%~8w+5K6jJ#HuB3@(^;7vO)cFSR;59^7P!f$`k7re&;qa{D^(LTdq2aYLJR=+! z33gEeZS3(rqtH_5XxNnYaflv%x%E^)6^L2J_F#ZpB_h)vzn{|*elVMNKi(RXW#Qx? zYh}k}wxx{^xTxen^7cpJ7q`)-LK`|3ui%V>F}|Ps+HYUCg6nZ$pbFpPJcVz5cq(O3TO;ziy*<%IJNfxIW#8kocDf$TUu;EH4Neluq`-X`IiHqJQI7JXN$tlw zRo`w2?-oFZ$D;FBAl^DLM*QA4LD|Uu;{OWzlfP=m#6-$uqx;(KYO)WU15}1mUR*yf zWNq&{XtrWgUr>L7OUQ`B@(m3$UIgt*IZF#GPbn=L8lWOGM1Ud9c%OiNch{!)?9R)4 z-u=pDO8!;!n5)^@^lZ$Fg;}nJe&VZ(K{OV#8d+L~ny*m$UbWewNK`@|@yNiz^~lZY zD{$u0%2W$aZ-=MFpRX}CmU7z3fpW(uRmB0i*CXYJ;>PX;xZV!1UVOj3+n>Gc9Jtf? zU*EyB7^>LT|I~mCxkA#}nlJO|#b_jR8M$LWJ1>zTNITnq{6a}Uf1{}`x}4=10y}KI zyHPv~M=&_%`>g{`Tmx$KAv~?fDpig(KLlo(N-9X7KRT;kQ>-q<-M*S?{cYIKd-f)J z9iYpR=89f9XDLPX_KCF%Zw&4OAEDV<&6g5w-H~G5>f_uKnWacTY48#yZ7WFs)~TBB z(!9nl|Cev+O9Dl8maNjeo|3mD4dDkM`&GnKW7vG74bXT{0803PXaCITnxfI59kN!4qf z$=%C$OVeX|Nkl|{8Kd%ou84-Bj~i6U3RviBPcH_f7Dhz*xI7+1jQTv6+Ip)7cx6!$ zFU+(yeyNbTzFu+9VmmK{BEK6Y8aVL)6f7-12I9RbOtZKBiIez}WYl_pqpT)Dg_!gIb=J0S^0|3!;IUMok+o`-c0hY`ne4Q`rLkRo?C+w zKJ$z5Mw7JwW|B%8(FXUS$r?Ae+vYQTZlqFRLeQn@&o_NtcsZtyw0q;((|tU0$n`oo z3=pvx7+Pq)9o^%^1?Lq|Ho*PAFSJjYng8z52Tmc%wVwE9bq2_mt&zGC?r7o7q`&~~ z_9~Azf3;cq?2MB6LrDYmNwjsz#zR*tJKp^rweuyGxAZ>A%j`vitYaNT@KMh@gBFSD zlTblmMbw;yj#3|SaYYKd!xuXY<}p0Z$L>Mk@pAB$gJwUu-@++J;m7>V9 z@D$PSOZnD{OcTwd(-&SXT|)Hhn^YsMq1OIUmW$su_P*7pwsMOnx4Rf0D~BhW?TeCU z>TD?|Fg9d`)Sp1FYo85jx#uozc<)9Bi-$VBzdxNqI^Sdu>&1f5-yVvLx0h;Ql(#*t z(1FN1tO-abtx9LNf_!)YYE}Kcw%!D0^?-qw5Rm`L!a^kniRF(85Oq{xq2oSwm`3u9 zvt}Ec0)^Tjl5J@m!h8JTt(Mt&3vK&8BbVfnVk)O}x>j*4*5e8l5iZl1oZS7n-79!> zu>0NZ%}d)0mZ^_2vm&Ya)y+4z>`jW2i%rOF`1oO+v{N~+oUIVD$XTLm z(;&`_g7ne@H(d3v-cRSEZhS>sBz(QwH;{sYJ;6kP?f(dyz)wGda3^J{Vp+I`&79ay z{u0n*`aPLTY}mb$KI=)|I~hecKsBX!bynJGTPoL>vk=PGxyEdrmWb1+*!+BVVdOPe zhmB&y#f&ZE7d*&*w{p3UASZA0x`+5lHxxW`TE`!*V-|H)KUz##XN zR}WsIm2iSqH$@&rThCjs9RPZZ4168C7^4-#1~umAWYMV}y7f|!d?qh<|eu&M9L6!6xog}k$dC7lsJX)l@=mxbplHqaFT6-<9*pZH5 z`A)nZ09eaA!N1Qu=QZQmFT(BmKID07;B%-{Gc(I*q-2p!?@9BoC)ukJ(k0sbvSjfl zof^Decf$d@L7WQg27Cq&vX}QDu`n#V?IwD^=ueZHeV8aoZ`bmL)P^0)Gl`2_r6-Y` z!$6+-_6_LA@-T8^-S@RFS@-4Pds@Lv>D$?X1k@k()ofSW0DakA>{JcLpEF|hMA;Pb zQUKkGM3ZqB&iBf-K}_hJ8mxzy=Y_*B$p*K5Z`UAovt&Aw;^v`@U#kd@STmz@4+8W# z6@^R1cuhECfvS-n$m|VkakoXC>2*exdZ%{Cgy@|2;7|5pd@`7LgY(DV5oW~%4WWHY zYm(dRN}GdBSVw_$oW@z4xv8)JEw@xuU;>Qwewu~UdI`5(E6ODT=Od@YcylGQA8AWD z!4J%KU;@9WwtoY$$lt*N4}$v(QvzjVzEZ{=#ty}%JZS_gOLVW1gcN~~b~Z4L<~(w2+N4iPdS1d0emKh;^uA`6t2JI#WVgig|da-kv##pXvqa-jb07>U&}wL zf6^T4rk<^0#y%ss$D|{kaYp;h(F0_GI^I&TuD#y@id2B@AHK!=nvW_gx_yFVh}iq( z-TPLT=$}L9mnEeaf8kF)gc0ZlhkzCXg?Y*)$qBWMQ-^{;sPn!RJ?Za=u{UloTPRcCJL-D1+)m4KY z+XOn*Mb&~}%&Z&<0oql-@Zp62750#@ZkFaz`x$4rf6+lGCmkv?kJ_MW;Pi!g;1b;NLLu~mL&}C{Qq<=K@IBSU-mT(X!Go0lh0jJXFw3&^C{WLaM z9W&f8JUlE&n_^JnSWePD;RZJ%s*3#dPnNCz_hdbft-GLl`rCf%7j!<^>EDlv;Yu06 z8zj*;Qq;nueNpaDNG0Xk4oX~~MizM$-AM1yzV{d$7gFz_jn0!zvg+2mO~|>tI@vC$ zf^1|kV~uFJ=_yACF720N`sySe^<0)UC<+_3ey|)?F_V44{F-x#{KCA&z(;U(Hn|ag_izP+kS4eBKV_Q2HSQ8KKxAH>OVGv!n z0w8vf!AZkXbOibH&5<@TYDEWn<>(X5viE1)&TT-+_ zB@efs)#4{VWKbev;O(m@J(RH&#fEINlms|8Nu5vVoMc%7BIe48gpLLy;7a&lel%gd z6hZKEbc`qXcE5c{zU_C3dvQAt21lRLddBjp&*nd+@>4EVFzy&k(RqFDPLxu1+!Tks_ySVx9$xaOSVSVyhF&QmHOd}5j65h};m zQo!WddUHBDcm3{9Q|S|_k0xPL-385YGVXF3^Ypz<#q|&8bg)|M`$OL9Y7u$c*PYIP zJGQ?4Yqi(A-RoxY3|GtY%Mt~4mHFT}@Iof}Q|Z@R-HMTIxWa8Acj&%|H{^bY!oGcH z89uic;%ckoLGQ&Z4oIZ;Ww<;C{k%EH?|z%UojM!*@$r$kQ+$_+ahgOgovWqhQB@P@ z7x~j9H9|}FNOLyB`$5=Tr}pgLnOE)kh7Ta!NU)iqoU81#P zm(yd|#SovqjUx;nlCtT%UsP?bTNpKpsIL`5@1C;qCYsOaJ`$ZHN^*2vN*MDp(&^m_ zvj436=C8La$IZ|gQBr4D0;y#Qv;W+SSE$QDzt(Qa{pP@~Zsq%Q{T;6K10#CKtz6wo ztb(3q@+X^KYGRQI3F%^zRH6q#e$&lcj3>MOidJTzCtE&5=N8$L=xBQOrhIa?4qk2z z)}L!cRI4+Li3}3!hyOD}%%;dT@;>*~PSsek{3B96mv_y~Z9S%nN^^WRDVc$MkM%#C zjA_;wnWJ8oSB%VufHH1audnYzA7Wt#`bc4CVc(9QwdY{xMYHdM%(NXd6R~4H}aA5$S`C z5!bE*+vZ`J%;Uq8z8qfrssU8OA_(pW8XXc5Bzg8EJq8|sYk13}E{t?OW8(M1rFFmu z#l6}VSbF&Y0w>513%j$+<`P)A5y^0~y&%)p^J} zQkgt88Y#Svl^-SOvmr%J9=i>aIyF}6#)r8~2>(8fU@)H4MBWGtHEWm@8-F++UpOmWEg0AsRhaCfPxmv>#2c_XKr@$U7Egiw6f zb$s2sCd|xLbM!fT@g^XOk*rUpHq8qsBUU`&t|6+fdg@W!8+9wDj z#ACPS>CPHy`w_?Be7EF&@6ra0+(mz^CdoguoF970{Bpq@e$Mnf3?Tzb@Wv*}q+or} zoDHesb!dOUWf7KXqMCpal!_5&u4(cn&CqqNHd_d#SK?Zmap|K#TGimxqn#Nay34&to4#G*9TwTRe*5P$2L)Q5+tzv0fl2*5p|J$j&wImLr$KA+GzI zcDhLAj*EwZFJ$)5d1qB$@YsmO#PTj+Rz`$J%*D5XzEPQda-^=e=jDW6PVV6Ga5AOW zejHqB0O>nlv{5v}m9#OUv8A8s^+fq!TUeNrwj&)(B|zy+bv)QTF=E~Rhlvtfi7g}Z zny+@leO9|;4%spVr~Xcm)eRVB0^(~j#mtx%eP&skC@yzAJ9VGK)tUm%`}kBT z$-OCCH=k`9rLR(?6OkRxfbe$k=XgO3&fkZ7ad?#i3YANp#Y(w+wea!;$csid?e$*oc05%4<9zL|cu2F!$C&UJ zPOT7<9CIA90IWv7Dm|ae4g=745Jr||^=7_!cAkO6&j|s(K3)j{&0WC4pg<)SC8WGA zk7K~WAsNO(HE zxlr`ER0T1NZIJ~%=q6&#;@$2BM6V7&RcpNh(vFIoqbY-9wAuz_gFCPw1IRE~3-4 zB{Jz|fN>V}%O7dPmlGYguwsG^%&my6=iSwrATj2O|)_5PF-(FJd16I}Et zGyD2a0h0{W9;J_9v1x@3%;J+wc!f2`A*ct7BTcTe_Cc|N1rmoT3G}hIAdLCicUYM% zcBXKtPKB!fi5tW+QKCyA$aSD@BYK7#I|{@>tJR|lqn2JfIrFveKM#nTi!TDLVj_}> zX#bAh**kuGDyd-;XIHr);jQ>uC@&AS?!5)7J%6_j($~!rxU+!W^LS{8Oy+X zzHI$=xC}tlO8k8yA5(;H*B*u%0hHDX{l^;?qN)_&^}oDQH{@x3ELp|w92ti!%V2;DIz zz4=fW>un_dgGqdiCKr7RV}}}i-GXS_Yy({BA-sGn37mXos(g`Zr_apQN0R<`zroY;xEsxHMa%= zng{0MWFX7Rhzd-;I-9Bg0zt2ItyiOYX5K(knocD7);= zG9oRIbK!y?D*BcOfuVh6*+6brHEn)SA1OeBGsrSYiw`^wnU8XbSadKXcY(<2K!>>8 zFS-P#jySfaxlzjj=(22hYCFUc)*FZt5*fHUm?-o+$-tWtskqUAff@!UuRA*V)Q!cL zEeMNKAQG1@_;!%;2<6CtJfw_!sj9NQFjCL^3?%aYpt;r8u#VOO!IAv7D6T;6BWl)9 zVrM1XPpyQ4!@ztM0qbNi=FqWb4`afIz=dK-aTOF-FiwPSHWiuFk$NlOuw`uYQXe>uL-lrAV z+{}~x9yr;UBDb4!p6CIdrf?K;k+G_1++pRtGc_Xllhf73cF%jG#!ZhXsz)N_nF0ad z)uw^>#oD=<1X*(z+vEs>DTosYfTQ$2DPN_XS}f^4*=kda6n-Sx7SbAO9vuci2@#a` zCESOARQFsjWcL@Tty&DXYdU0a$_N-a_VEl%?ayMW*H?u(`CU8Jxy z(kp%no#BB-`qBtHjtoVD-jDJ;k+g3s|_ zW;8*FxFmY3xj0yN%@i{ki(@>cmmFj$OjnP^PUjqRTZpY#86FmJ3}guCmg4g73y-<{ zLkpd(FR9`a?70s}9lT2wki%Ut>!3~UhbiU@lQMeYP&?i_Wj{=5bKn88-o`JE{2pc!X_1oL^qB&n z4uQZ8;e5q-$qb;xCcy`Y*$-zHZpTYLZI8!Y7e1xx(cPXRvuojdjI}jd5M`H-@)D}I zo0#B7X{5!YU1yUX%U>{SPetL{<#9S?9tP{Xb$3xNR}sFDI#v5Ss-$WU;Qe;^Jp#c8 zqwh8mHQX1)a3LP(7!3F!t5{{$e7MpWo>U2*(#?C8Eo2h`Ko}4_PWlA$JdWu^-9#q< z)jNOkoQ2p(-6C_f0}+Tn+&EMwrda^K=nyX^fEq`~Hv;!RYhr~>qv3F_q6if2^E`My z99_bQ%1lJ-seu=&DRFz=B~*kx+uFuN6=EreJqUuvt#ANdod*Px=J3BlF`|zjN_NGC zoA)Pwc>(#VgRt^3liJrii86*teWN`D+O$WTL~M0JtGzN%X0ms`t73)!qT*VihW7_y z6KA8IOg{MGak}@pf~ckD@b`RKq_8}Q4O{6}vkQLUe-(pCFIgl71R{me;D)ewUJIVDMri#~ztH)Ht=MfX z05L|jLAs{RyDGkR_hVYKFk{!CDE9H9Lv^r01#1L$*BFR1+RRF|31_quT8XFr$R%L!YWY zd!ljRCL0!KGsg|>@fDtofD@~1SvtYM>xyU5{(QNdZ`fStenV^TXzOw+M`Lc_olSaK z@SWT<|7zLBV(%BelnXMnyTRH&=7S~#ql8T9tdh-29u|;COnr9KkY0QSwSx6WsAKe} zU(}fcMkP{M8HOo2PgrwC4{}9zFeX9>!S@*My@-ucsBn$Y1Igv2SA+A{Gh^2p79#u{ zl3AhfMV4EpjMuCgaHUavwpi8-1?vXg?fLxsW@ctvzAYW!AeS|E;X8jD-0E+4FlhOJ z2UR?M-Zl!H_G_4n__W%|X#9FU=M_`3++$#?ku$=K<`VS>JiO&ORAOhm@~&m6UPzKk z8_9%>r=2m@>9?%xx*q0%8>`>2>T z(Sww2BZi%~pypcDqbtuA@3G&^NhKe^$R1h>?4*&aLweQ3!2Dv?(4RoZL<_#Hs{kX!@Eush-Z zH5f&>(D}+TLHD&-4O}oH#gc*z@&9K;BHP{o@qkZR+lJr2K}HVb7%KRPtr$hty#I`= zFT7k6(1#1mY#*rpdztVF(pG#}jg#H{&+LGgdj#%%h8a!~{NKxz1dyyF#l@-$FI3u3 zSX%YW)5b@|%LSOgEP;X`TKCvib(EddjlS`nGtAdCOx}AK@yOAxG+tu` zvz_%3uN+gDra|{<7N{_PL>nLpHoF{n+&{K~%+D%Jm}YZtvO*NJvioAW6ez%nOn4M$ zf>p411;*#@#?H|Iyt>kBjCWI&;>MLkKCfTDM(f-e=7zp}2%eLgQkKCanKex#{E_RC zP}%I4s9(Vg8O86R`gW38vB;NQ#opgQz6k>~e~U{Oem#bHM40r&DF@dGP5qFX}o)Bgtj|MxuMo{zO^_C>kvWKlA%k+|2a^TgI?$f!hg17P@g}|>nH2ChPnPg;UmJL@p3xWE@?}I%( z!`9p5r6)43$9?{13b&YHnPh%*G|?@(k9U!Fhz$8=V)BPv%e9FDIlbN8LrUIvw9)(0xXT*aS*0|SYFXX=V|7MNhPH^9wF zgZfRAwcdmSqMPX&>#_IWKb#avis;J#b4DHgky2Z&)5O6Z3>lR)o?&r%2dL1!_pc;< zs?t;i&;|8$zD2i4Y1DX`L79&4?5rO0;2LD^9E3-<(u#_jbvySaT^GF809J|8#4BN@ zmd-jh;f0z?fGO)&Y7j-P;u#F3--EU2JwBiTyTk1TBI!v$;qm6c&r-5I8wj^lKscig{t&Z6~;mHRYq;&Ee%gM zxj<)!uj^%?1cTFNRx4jFg$4+Cl}74h{L~4by?fBF2$oqE#`#z8J3R1>St@wtiQLlZ zlE^Y_KXi&{5bDYTd5$RnX)>3c0W*!x#mF;47h;ajrpzhT??%Z(GL|ZT8ZJFP+Z&7& zt^}F=%pC*Yf(0Z7D0!-wozHXfNA(1pvhwQb-AMOmCLhvS*P9bidR_az2FrJsn2s4m z{m%2B|b6aputpPeJT%9Y?exVJjwe$eoc9?fZ zboOK%zL3*E+4;J`>F)WDKnZCrLPYl=FlZsr5SA-s?U(91)H@lOmeP4w(9^SHjxO+Z z-}2zofEAZWVGg9t4{zADbb)~=OX5#jQCExiz#KxjbvYl|7}<87zhOPq+5~9QE>pcr zXR_)&!040tA%lp=o$W{ykwt*dFh9`>gfQxJpQUKsT(E|eI(=GO2>;s)K(R&;aSDV7 zWPZreF>ooFMOw!A7tjzdAhhvmJw2J3z79_vJxgTOXG+p{Nx>%PRYO0v$EPG2eX`;I z?-^=fWMl(=86f?XDV1UA?Wh8ue2#foM)5_3%22jARt@}MrpPNq$G10nV#NtoL<+3P z7)zJl8_BFdxbR1W2tV1{sqG>I-~HfBF`G(g8`izPs0Vl`wN_?LY#tUdlWj z1gggdd9L)r?vP|=ix=Gw-79r|xPCJeNEiAf{3dzak1zEGCUrZT67@^tXypc}vurwc zx6ZVvw}-V*LR@X2X+9Xi-Zvo{Thn# z|1Lp#0S+*pPgj;}|7Y_5pUvw=31=_EM#5cB}zl< z>B6J4>Cb6oJfwrQhE;u#cB9O|8WdkZ|FeAXR4g5@!^Sqm!*5-t&0uAbF){1sd9IFQhX zE_qft58h_bNOF5D3c-2U^udm5 ztLJi18c7;t>h~yOAH%K|`(2>XAhj=qq zs`~ZE`Kps(jsKCFNwySb7oWMR4;wMsg%leNy8a;?dNhOeg>c+)M?h6t(-WH|RM?PF z*{xTxC(w3X6=P=lB%JwEAm)9{YiLh!kY5)KoeEa{ICuL59<>@tM8MDdNlhr}uTmR( zW!rec4h`Bx0t%z~;|F_|B3q%CxVynG;B7GkdT3|-suY)uTT!+x#&}wSY3wX=5}%rf z`QP&5v5Lx=l1Zug39JYvOcOj1(id+-%^*+}{v65?AA8TCsANf+(zxz2ae0XB(4V3*Ok?dA z460U^>|H-Q<*A|LuQ1CybS|m9mcKe_w{cDNa=N2D&k-&Zf7_4N6YDv$NCzF@DMS-a z>130AmpV!QQA(Ne3)OgiyrV^!hA!syA`i=Sh)4^Cr~k%Os_gF{UPZZ8G{R?=h4pOD zvFtv5)c-50#^2ttAqRi`7}o9gCn{ zqsxrFAyzGoRmdT3w~O%r28o`$yy zMvAl-Nqu-$?-`G9N>I2#U5${sSFSW5s^tl^Nb*lDH^P$pJs-ayq()0<7tSR7sDt@P zLZ5KHlRw_&tr&?xe_gLwS4o=E;k0KkDQ`i`PX?jf6xvQM&ZaQpU9AHd7Hat|zE9t( zAIHi2%X2d#RLm0iGIH*-GA)m{5Xz7!m1~_8qU^oI=iiJl*_&mWsWX>#!LgHHa}AKu z(`o!us;WyeR%Sh4BSLt{mDRu}wnA}}j{#!T=g=S_HFX%KQfyxSPCSolOT2B1K8^DW zD_IK6?%zBY92RXY9QBIkoSc|SW}iOh7UkzGuZ2E}pcSbOm(E`e=QIV$D9nj5NAvE< zgSe-yHjZCYta}b5Z=^`Z!&c*>1QX>ybID44Xk1Vnj6z-``?;~U+U*m`rKk^Uqmi`w1 zxK53a!A}VnuUipsJyqOsZH0X$&->;>tZp=uJLAdPB0Mt2<@Xneu#T!SJJtQ5BE|4X zS?YeRj({Ysj~hv9-@g8c950kQs&rrIz5At@Pz|b6#tMVW{K&knV@B#ByPLm6M(m=4 zQG%wt5i;gmw9^a*;TJMQtP8Zx)?=~ce6BLZtp2OPs2*KRw(*cjD$2St0~%29L7DKj zFA_@ZaNaXg>~%B5B;l*W!}NvVAievnGQjyEyH+XsvW&d97|Z=Ar*+5365Se|mJBv! zjZ!@p)qI5nMWGutI=NH{TPvMfOL}lRWFs~Sx7Kvc9)o`nc0z6GXDO1Oy=w(w#Y`C0 z1X9Ot^^6apO6dS$6!Amdhc+b|m~;481wNXZ#H@tQP=vo)a(*qSs4mF(h{+JL->oTk z2z^zLFoyi-ouLrt>C3V9xsBt_n7CUX#;9fE%Qm=Mj;;4zol=*ZW- zMX>A31jgH_jVHFRU*qW6Mvb%ne*l<3XTSZnSXdzspM7;KBZJ$mQ7*2zDjY-0#!-Mj z`rnCb9{%_;2WPt66utS(gGIw^yMMHFD_z4kf}m?k$3KRoHf8-t%sKorHipCmseE`E z`{D4SapRj@ymAL*L>lNk=AYj5o_4!1giYhfogdwD&51?AzUJ3YDhe?XaLK_RV%o7H z`t22G#@$|a$DEKV@uLD!4jD8c%Ie-L>f4X`Ceg*$+^b|ocMLO5fGRD8 zCt2*oTM{q=EFi5T&Yer6n=U*7VKj>N89YeCdidElqb+#54c?emRy-(|9ePLE%EZLM zBP$uKYG{agK}P!j`m?`loP!Z}R#vyDd)K3)#jEoa8>L%%;X#pciNBJ53#|C;gsiy` zwxA#vl|WV{v!k4?tE1P4KdW+D<$b__0Z}jfj%$bUoJHQ5o9_6%4;VNQ@3e16-TdXL z=cA3=n3o6{5$+COb?B!!0o%{H#~*(hLNt@{Ur&(-pB^cht)Z4J zK%akV9qHL4O9h;k!AREg(ehBoj&v2<5;D9a-JJ!Lw zY4r*j`Qm#BAyw9H+N5C(9x@PN1ch&y@u417I(48SFcb|bSnc+#Jfkv%eg}F$7ebVy zKG&^XBa4?TgA}b{8^$~oh7PqhW>c^?#JU@HA04_1(0xU-5#1~_Ir(7{pFU`k*yVvP z40LTZ%<3w4KX|+R?B;v*^|Q~sBtss(Q<`REVEhJ$YsNbr^nHb#AY(eB?_i$MN?_qq`9o$xJjl=nW7>M-SNJ$Eln84H`(hw(Vrgww-d-_mv`wQvT0l-OrIsCYzLN9wR7x&4Nx=otM?N^^A*;_Vf ze>3s?cG@)A61v>shBq|qOJ`2;E@tqK^p$~**d6Ou$%HT8)EC)1vn2yl)N;^w-L6AN znLF`QnK^5z%Ex|w5#(t+G?e7%Ki4UFYKxQ3LP(S8v*yVxJa+1mB@5>-5b1lebm-Vl zi9emwJt9s$qro_`;7+RQW$Y;b$_pB7f11NM?;+?rGz(B}? zFxJ7WBFv`HcoAl>BkgBMKCwt^D94_7yaw5=S3i0EFL%k86BjD>oc&T6hT|bP05pZO z4e+ULUAtVay6Q?L{T|&qOP%^nWX{~V!b;Qq4?iU{=B2WZTxQGH1?gsfF*SYmdJ2>{EY|tFO6QmTkb;9zcQS zY-J?3TE^+bUoEq(1_{mIDh^(_z)Jz=ieuC-%M2FXDIDdD(cp{lgMV@4B09R|R11(#>-1?|&{mJ7VA+M_i{j1gU_0>NS^x4nIuYq+~FMM4lTt zM%J$k^8&I#NW!W@E(7F{GX~DgQnY@w44Z|#f8!tFZS~ah$d9HD%!7YfA z&J_XN1nx$tu}VRR1!q1-JJfG39-*MdiD{!fq|$it0@DpO49HkeY(#%5rbOnaa42*m6g_mR_ zl39=%rfsSKBt`ra(;Qs&m%twx(i!*?mid@o8GFayBHe}>1l=PY`rE`{$5=S5M)FTV z<^@9OiV&!9Xr+V}*_-4K4?ZX(-uy_qckPV)PnE@s7RX7boiCH7&X!wlxJ-IwwUI`3 zF(DUl>2DS%C2R$DevZ z=ZgezAhqUaf|f+l3eOPT4G?hR_s^j65zt>_D5UL&$pTIb7?yDOS${I-zba>=`e6#y z;m7_zx$yK8gI#mzr(wSOa)K^T6aT6aSO_nEqwZps;l75S!{~abpq0ExtSrE@W!j8c z^6Wn*$RMc4Y5w0yCrCY9NqPk+aD(Qgj`Zo-RVVt*Ub6uOZFeb{`K~Nowvb*FX3n+< zc#`a_<&S&JC+Dbokqxq7W{jlkrpwM9YvqNf{~|p)ca)`z<}ny~^^JeX7S`D?!$7b; z0}VsdtdBjQ4;L1&!3+c0v{c57dW&xm9yJ6aefso~2E2h=YUXbTeD^5k_GbPv#*vXpLtdW_QUrC88C(i zGEk7va!*C`JvSafq$?TFT58U>1`Fi4F z6_x4H1fhS0w;_S7m4qj?@>9HsbSNBw%0YmI3J&(|VgeQ(P@w7*B=q5P-N#A>Nt zYWzuCnke(qP!Q%Y-D!Bp+bHk68C36+CQOz?e|(vA1F5rUBh^hwvuZJ1jFfbG?srgKz{N4~w#D%qR)J`vuP0Q!Y%~xH8@I=;a*n)gp zDH)v(mk0j%m~?2G5xysUmo^2rq=k8yUR?l+6{5U+sne>H{Ne7~r31oTv1*-8GiCU1 zy!MK0L?ZKmM**g!^4qmE^F5z58Q~%6&wJxbO?nU=fbWlh{y_PQ)WSU!tSHh@@Uo>A z22jnKK2zQszgT+r?Mr2&+;r9D(j3jZhB*E4SLeuvWecQN_bi$A+3PZS+9GY2s9F+k zS$1i7D8Sf4(pFM3l>mw@fPng)*b#vB^XJIp4?ZW&I&`Cr3_JEnX^F9WY#m}Cw%P@6 zC>|O$!QA+RgB=HI!F=@De`UKmBhf=8Oi&fuP@DwajXISbYgWlWU;S9>wa$_?V4^F3 zeulKm>Mlcv_EiUs8aCAHSEzslW0V@&%Nj7g4YmNeN*;4_$~5-wgLu%QWL4y_pPnt5 zZMet_p7M4`&h{;`b>}u*cL@H2Y8zfDas)p{3TzW{aUqf)Tb#P;Uq1bBkj)rCwi$qJ zL`5Uatazm%-6>dsF=yLW$<8j2oE~Hd+)2_rTb?FOf<2*C5rI%# zu+*TIlf^OHI4Qaq&juMe@;w#W7KJ%%*b%ChmfFqbfJ2VdcjjoHtS z;!{`k!%BOD4m^w=C8Jh`_H2Lt;a9S3FV*x5JTsd zs)(09Lx&O$3@#UN!Ho0>2j-K~`g6z;Tv#Bj%t0Z){lSC+=4wKlb>erB1y%m{sr;z81Je z99*nPKZ{MP<$>3dxL7m~DSQ!wrPS%SPf3oI$6-l=hq|iI6 zM4=$g$I4vz*`{X5rbXXE&Hp@pxQ%4y^l5V7G3QF}ULBOExcqDXegkCx)*{oUu0nAj z^2jT1$jUWr>3|8S4viTtGkA*M*qfStWtpulWu#kae(PGa#6HqQZ&@}!3ARMhcAYDkC zejbP-4IR>~WUgH>QBMBpW$I@#yEE3TkSnhKg*0mfm3Tf>*-BQFiASOd+6l}ikAGf8 zl9A}NwEF}Prgceln(!Ldc*1WZg3Ro>siUQMguLk^CmcFJX3U(cUjGZ?U(Na%-y@$$ z`W)C-ziA7(`TA>Q;oLbgZ}tpz`;*JAm+32FVL@%r6&~}r(j&(ZAjjAb&wU8yPZ%ec zUH^O7)soLX`vmDlSI~u5NNqUF!-ta^Q7e=yb7#zzx5h@&yhRgqAK|wMa}q}CH^E1k z!qxlWlW-DKw)X8%hw)QLbuC`l_k_f9k|)2(My)dC%%cvGVHkXR?6JqduC{Dlw@kkS z&gx)SMDWeL;$0XJSv=?PA}I|~bmItA8Uo?xh;qp!&>t+-di*r=W$HKI$fV^W1N*mE z7bhHhoOEh}H<(9VfIh}X?K{b77hiyPE2L}hUXnNIEtxiDs*+0q2MMA2A3pL}NuZ@c zGX4CCFwz^E@3MXM8oBqWcco3It{~cta^`hsOH*juwP;jgV_xtHsNq8Nnzm{qr(Ag+ z>@XFt7RFD&Or17G`;b#G*pBT&MO3lHXbohh*5fuTm5)dM4R33s(y&N|9(}&_?buS? zrZsOX2OWAW-sS1QyrrMNCW|m1p5d_ar$Gg&XcN)^w<{LRm3Kb=Qo3e!#gwr5a>U8! zNmlz7>aPIbOg;wPb?Vbk-g)kB_!Ak>KeU3M`17B1q&sCrn->Cy)-qQ5Q5&kJpMK_9 zdXWJG2Fm6w>*e96UXh`LI)&w7IpC2^$pp#NagZ2lYig(paiC@)=&kfd6&Lhh>Ol_- zix&n|#L$v4hiiBU2+?KOH4zH-=AC5NVF&5!u34RB_*3`F!WC@8BUmMJa3|lQue{H} zhso3TTraCuuSI(Mi!7TY!(aMm_}$Pgo#D{mCDiQc+ze~;LV5I&hZutN>(^7SFJtgt zkIdQvtv`OPh)L72UA~?)UEepVk3RQ;P(eFz=m7mKmeiN8j!QVDBgs6Nx5QIx3mA`4 zzflKy;+2==wbx&k7hV`4^H;2uV}>1~A*52f!CIZdMVyFS5%fxkDY>kQe__d$dQWYY zK!88Zgx@HBqG@41`S!K))?06@jN7(ysFI++`KZuxWg$aP>GkD^qfgLxrp$Zi?RSMk z#lpa+gh77lil6-v6)yQPv zJT8i!T2BTH>Z5OM6#Z*&|3}v8`ZBS6;9JYc7`&^NE|=H-vrc;S>aKo|JLwc*??ML2 zA+4SqaAcT&t}k&KKOqbn3@Uxv#)>C~$Jzi|lJjRT;tQ!)7ed1f9lW1(Y#!qqwu0#l z+JTc!KT9q??MGMxeu`Eso`3uv`RLP02$AY)j16E55@DutBqNeA`LmDZ_Wyf9diCgv zZimTo*s*8H*=PS$Nt00*u$9WNHIeYE_k5-)kR8wnu*I*$iq{ODdJi4&M<{tDv}}vt zb0oXcGHOgZQvy#G1>f+XiL1QLLS3;!fkdDseDHz$sk{0OprV0UxL~1P3qa`{i=P1svguN*2}RpyiMG6Y}v#@5-M?d?fw) z50q)srpg63JSfK;JwRp42%ifCMeBa&dYQLip2nMsJXy1RwH$Kl4brArUDYgMy*<2( zCqUhmdZJq#xj;5-zYKrme+;~A-m0^-M5VM%o95EFO#^As2yt^9ihe6G*%?pxFKdjY zic*ta#C};dDM_A|YJ~vH037@fwU9w=J)aBAz;fxlsq*mfPo)DFdYQje1{{5%WcF-B z=V%46SVgTKMM>|TJ>|IGBGYFq5tJ?R!k_;sa~AKwdloL}pG84Iqv7iv?UTc#fMhwe zONkuSp>MtoEis*&4hUMe?x2OSmX$oIW*}^T%+A^jtuW+C|NRbBz}Bx>hjkNL%dD}( zu>cEN&M;x>AMcnS)UxnCMWo7>dw_AQ~( zc=?Uj%iJ{xETyh~3wfO5m!2m(Hq4jb-F}C<%?m@=<@bNMUs^Xr1p&Pex(c1LjXJ@p z@cW@giKZ@e%bdo>(E$0_MwWxu!jUWa=wwH|Ku582W<<#c$V+8*ebOzCfoJrb||4Cpwp%*&%Pf`0p641k+Ets^K}^5wjceGbE#9ANkW?URL*Y z>)?k2^T=b5$=dDwJz`b_+x(2UR=R9mI9q;p#&x)FEUU4(4$XDfT`nz9|4K)GW*}YZ z&i_VlmSO$s(-qB-QCeX#KpZje6Cv|u+y2%K+sVc0c;GJ#)^&aQw!%w8U0 zy#-v+4Z-Py+B3xp6!L`MJ5hpl%!DwQ95?*+xp4$46#@Q=K^FYILJki0(FP$3opD%M zj;2YorWm2TL@qx6bZLt5TK}?fx|Hhf)vD|#IL{nB>MSN<%#K4RLM*JGMTk_o?^OX!%DrjK!2|lj#P(oyIIcQ zCWUh!cvYL?zFCV-a?wc#%L^kvmnO~8*8(#U>n04%I#~V35e3L;B-H%%GJ5O;b+=&Z zWNEYSFs!oIPM>v(7Vg@%ZY#gJ`b@d|iPxoT+cX)ERRrhHUnebkG)I=P#TmW}{V;Ia zv}v9E{cmq@9UUy-vIrs6lAgW0YM4456oith^ARy&4e9PWXnz^;#GP`^)xU!}dk|W- zV`TWNZ_4f0o{sKfq!cw2tSo@3jl<>b&*#hFfdggYCA9!+^>9l9tP5ZTK~Vv#55Ct;9zHD~;?}e@F$%WngT=e?A$b zez#%7d@ylzW6!+so5mThm~X-4V=3GQa$pycFJ`S$tI6Mtlg)XTNIj+l3-m}TcJIR* z{@(I2M0UR){Yd_J+ePxr`(BU%{RYa&5f8|}e{!N+aN1$e%m4v+6U^;@?%5Y*32yuM z>?~8EMS1$4V`Ok97J9-g2vi|7uAoehWGl3DZ;qTIDOgTBKhy%HP209yxmP%bk^!=1cfwym8T{q*QsETf{V&aL(O@ucOt;Y!%;3y!c6 zNS= zkB-@=CGgQ@_@e!Wof6I4$PI8nUkVhrIss6)5H4;uqq`y&V3U=CRnZ$y->um8>C;`W z*WU7Qv<+@_8yooNMl;+kjW%27kBSC&METpKjk>++SAWnQ6m>ThY!bxX+qi4rKc2o_ z-wz(pN3XAp{G2Zk4mab$1}|`zL-8Taw9(I~n=ak5^m^{qcSS4d3?{Dtd8`Sc%Y1BO zLqYIz&9~8+M}-Y+x_9lO{zgv;-w}5pTsY%%wQJcvgt^}-=SK6^!z<$D4u&cw?_+=A zgNmYWJ_(!jbZFlWZS7R>k+O=0#c%e99%*$NM(sOw({OHo;0?t&>@pbgN7JlgQbO2q z_a3A?ciYV9hD%RZfAzt{-3J~LO=G4|vQ)<*@ zQ1tEc?VA7GTxaHg-_-DW_6m7)@ufFJn__!naTA^pI<{F-u%c-F%+cz;eTVi@X1A_- z|IElqKs)w*s4zUmJm9W0Ll=dv!V~i%{<-Q)=5?^M^aVeB;=AxZ_{O6fDoors0+omW zE8ww8I9vOn^^&d2kG}e!9O>D!m&{+ZN{&AJ8tL1&ElVY+3UOxTo)KI2nuz__ z0T0t}(1GZIhDqV5Z)z53(qza!`|921ZM4Vgi(h}E3$di7Axvg4jb>T4OrP?tKH4(9 z!wxxET0xUs06i}UuLTp7ywqCgzVI!&$nSo8tG-If2oT*lq)Qg; zGP_Dvm#)&KOP5_9S$N+)v#m75GQ|10XvM}N<8Oj77l|DYh0n|n9#}M~UL)=Irq@=T zTFact|CLp1VqG%CgfG+pCukV*K>x8N{*A{*=Ju?w>#I(mwWHN zSAA^R5;hI`N{;AthN@{*mMMwJF^KM*0--)TbAM4q?N$dC5zC7*mX6T7H&m&s!=5`XEV(z{t3Emo;Yji7erS^*Qe z!XgX_jAp=)!GXV7TC)p5ZBs#v{pCjk8pT6UBisfXHP=kouLaVt@AMLZ6zu@ zd&{KO3Vq`Yk9iT&$=E`SsFlDj*s6LPx13s;lG;#OwQQkc)N9ZT*Hu__aSdE{(%)Hv z@|z(u7??V8_)#ayZC9Nt_xE9etzI%oUj7g}G7cIn6DLiSm;X5$ zdEZ)g;%AGlf9;lweL@J0;8(|eP7E8(Z!)M_he&!oCh8A0z4y+Psjz0EKEE7vJM2N*H$v<{W_o$fFYtvDV z`_bX*&^FkaIb#kc`GMeB;NqWEs~l{g^2Td_hixk6l+TeKhn|k@R2v5DL1L;;VA!a% zrPOO8XPtQ-AEZU=jyO-o)V~$_Y{buLgzML>z#0u>rG1A^GJo-U>3-Dt(ybRX7aVO5 z@z~E!69zF-UPORI>AG`i%`n&ed>NL75$w}uNd5ZF<(#ulm0NEMEm@NOzyp3N7o2+@ zmKI_lNKr@*pLS_TH2eUFYq{Tf-bZiB2{w@%itU9ZPF+?RIH z-3IJ9xoPMaJe5v0?uD|vgeYb4II=xqNtU9 zdVvvS8@9rwi9KR*mF5biZ859%R<^P-#)BRJdI6`4rPh_h4;f4wj1vp$ z%ZV5qbji@yFPSg@`eLbM_2^Cux$L~t1+%T#8-P|hr;3H5m;>0c{hA!Gf0%++t5#44 ze>whhl`*tN8A7%!SumS7(zpSKhC>5%;&IZDEUjzHWBFIyGSnbG0%k_zfzP?TTN zn>>QO>PZf*Th^By8?)t`@#Cd^$Ih@5`NKmGNC);xKsC*VYFG?d6Qu7;J*kjzT~Jpm zQq3FJlcp`3NplREY|*TVG;h{Snl^8$M>E`)cFs~|Toe}rT`VE_~lU_&pa?)HZ zfjwIq*9o&CZi0%Rvt~F(F$3vNLj;<9Mu#KhDh62U?=h^($OU?6wqr|Oz0;O?u+ta2 zJyGhAkfL{NEEL)o3c>(`OLHGw(b{b2Cb^A$S;>upr@qn!>!WAiYQS{k&H6OKDk{pRPA z>ani;hwo$DHj4rhPRzs#4127}SEktt+zng8v^h(D!}_sSINTH#1aKWItm)52LPyLS z=v;dA>4qx`@>oCa_>Vr2RqHTi4R!%>Oso7lsO0Ns&{m{Jw_e(kws^G-7O#P!3!zIF z=?y}ey=;TJpEctv!O#H}O1pzG4Q=cvqx7APu0L$QA*un&V@*&sMtDuWS65gw9ydIg zt_)1A6OjMF!;hC|9=t*3FI+5Ho!erXXeU{=WRd*lftTgrgF0#&w1vnGdvrK^2u*`5 z^%TCURA`*|2~up1b)XJ~=?w>}I=I=@fq`f6i2%47AxGacoDj6tVf2@xFZ4v&O6rVA zlSUXLKof`n&P!n*lV&;YCJ{a9|k@=TdCZ;5p1*hV^c?kVdx<;!EQ zekKEZp$h>OBW`45I|ic0fN-h4hwS>@)RH>&F|(jE_BmLwDOA+)cGJ4m>Pjn-ifs`Q z_V59zQIL@?bZ=liB3)#OTLBYeo!19@rkTdBiFe^m@}5JJDS6ZHDX(M%xF89Ct9fW8 zaK_QvMGNHr9{UGWx*cS}yg4e}JFh=0lRte!(sQ?=Yysip(5Yr9K(sK^X;S%83uLl# z*&_9`W=)uD{O6~hl1nZpmk9HWBg{CqZLk8^v230m{-%U`d3bel&z2GaBVef5E1`qKw72?#!q;r3Z z?J2T(b7c*wqP9XypWM6jddCWEl39U{1w`4FWZ2UJ!N$S5d^bPPdx< z@vtdtz$9*!q&H6dNt*FhCfWQ&)#+9F`9bG!mnk*Wc0EaKQ&K_QhyzyFX_wm^w;q6zdA8s8`^W0)At7({`H zfE~)s)Ozi(5YIs(ApUxFgGzeV^jUHYh`$l?Z|mxn^3o$CB(-G+S+ZuSoO1p_$V0X! zvoUa!+of7)$aNIp9K4xUrzsYA8KT7BpfLxeEtdK7mdQ{MKYc7%uuw*fnu#?U`pA;G z)8(?OPL>Ys8)#l~E;xsZQC5=|v*8P$(R$WOBHVoX!Mk$9-OowauAOAwyu}JcCyc^> z`jJ~@zx{{Eg=Zb5peZk-b z=c&iSPOQK(25JpbF_vX(Y=(rr=YUxTiTR0IXZVgB=y&Im!wmb?zrQ0-z4E^F=$<9B z=gik=vpRQ>`))W_4jIy2PB?5|C0EtvAAK-NHba9-{QwI_6bE}8X7%-eVvotiQKxno>=h??*l8iG zEl9XyOA3aB6OQl{Y!HMJPJJ2JyRW)i8(VR2^rxT8#w{#fb7lFmc^ICD@fmGN;37v3 zJ0NT#!$pKpm7^$h8BP?7?X@~`;C@&y9%d)Ib701d`9{YMuz!ES;y^&nxCbjt?}3A) z4)`wxl|!!M&aR>YvQA(WxIjNMW8yn#8B#L}uv_i7a^;UN!n+JvGHdqP0^=9`*4@ohSC&j{CpRlVs^#76@ONYV=-BT zp=K6y2}=g0!G-+d3%0jI1p$>ic7Lc{h%*EEu0dIN`nK@IC4xDgV=kutoqf&C(xp>7 zCH;dB?1$-={V<(&sXD&unp|$;n3_ET4=XqoJzq zfr~B`s}L5?&yr(F4%@`9H91o3sjM&r^f&sQ*oxY6x)%HBYhxtvg4q+~g}45LInSMC z{(?o)vP}m$@wnrqE=u{tHk01NUua>V%)z}8RVu3b*_+nN^TS`4d`$al-M&3q8#l^L z*Igk4x&(DUTPZ9ub=`9la=Bv<(VbFx4(CmxY z>O<@e;^t#Kd@JbHGNz?qaClZ;P`8=_+V0XYWGrri);0#l)oG}1O$AKMXc(qe22>`s zD&YjzbcFuzp1pd=k(lm1YVv&P*s+~_{EvrZ-2-<>C#(rDZtREZHy_h_X7=UTWpNNLC_A+ zC`=54eMC$r(}6o(a2Q0SJsTM);__BtE7?b64&L?e*8>|2eJd}%^sdy!l+eQuJ6>9} z=^|^^PL@CY=>>V{!Cy*awvf>ZW>{256BeSa&ssTRe0-t_Z~4X8w7X!3S1~8yV(!(` zE>3i3ZY@J%SIvC9v)4wSv1iaQOtx&^8n6%BD`Pp~+I7{7&F)}m!3*vHlFstjx>_E2 z@J)Tgils2@ghz3 z+b0YH-HbJu8-(ox<^A_YL2K}Hsn0G7{_yEsJ@TeBLwEd!aiKF69^DaJHpmD6{Z@K* z?<(mTwb9))T{?8>se#am_VZ}4>Ot*80O2<2&%TI}BTya)aFr}uMJuem1skpurc1=} zZ7{mFmOgHd{(SG1{FO%CvNH90-lexjJ9U*Wb6v*k=K0Zu!@}xvojQk=!ybNhRJ0Y| z^K%&)dUtW?Qe*Mu>2hGh2OM~%BJl(P^Ykw!2p0i^9sTRtV|0r{(;9Oh!U!vq#n%bM zJKE44)StM)K?_|P53B3NRu|K_3$VsnE>_FS#Y${&!qxF~ku^MUbu`*>MYWie`c&G9 ztG#7wx^kmAAHA-)_U{{3V7vQ?x71r6Ld3djr3LX_`@BWbpYOUz!|K_yhhASAH8aYC z%$Fa>lDv-E9r4p!qxlq`CU1wvKzVe9H^T~v!J2|_usnMDp)9iG`y(b8{IB5i2O*4C0^uQfgsK2ZZZKL|SPE^lmf-Z{) zciHeuoOtlJpeuG3BE~x&{QVHse6A1(3tgxaF^+!!#dz4wn>5w?|IOMEaA8>Z1|Xn@ zLY#Pn5DVi_t0X;zs$J4oe;k1FK!6o)t#tEjt(EwFh>x zN1yl;T_glW8%louw!drQwz$D>7=6YVl-01YoO;0(>bL=Q!Z0KM^^RtMv zehOOY9Ch8KX;ARx=sY?M*jm0kj6%m-N?R5Ket`guWdDPYROe|__rlDc{DsV%w^aWA z0b1|qH>WuOYcI6q>^lyk(*)2KGE;0lpq6I(RlcUuuWxTUK=@(G-hw$(WHvTunh&ck ztwYzrxu+j5jq5QT3=zVv==Hj5 zuaG(nBE5lJd+iNsH+{)!X%2CUhRv^Xd7r0(&W(ddaV``BF!jGI^3T82l5t;G8osToJN2 zXd}P)?QQBiwqT>&dD|_r0AnNzxEdcm`V7i}2=erk2YpoXY;}cI;e_W)$e#_W(tz$5$OSs26&OI}%hJgL-G=d_(x9}6Rh*68j zZLUoH>T{XCL?p9wdz4*EVb@s(@4GJw1rz|EQ+xfwV%A@kVd{;vvRS#{?17G z_v_O?G2?@~lBO@;tiG1bnIQieJzDtGNVixJ*=I<9j&)aZU|$2%9w#%}L!BOBM1Gp4rH%~f z+fxRC^QSLa!?!Z=tI_i6oB!5(w6yg)a9}ST&5x-aF+L=?1P>QZ6TqRgOY8h~Q0Whs z$*V;M4(un(R<4yN-WV%=GchQUbMFdjOViA5@)72)=WkvwZ98%H!T;pRKRqnl_#JRV z8$d>CN=LqMP$;ZnPz7Ly!5Hxe0n{pXq5w3!+Pzxr;-o?&DIx4ahY7_r)5%!%8{n;4 z#C)Vn&bG}m``a);cFD?3GU$Mlq+N%4F}acNNI)7k4MJNQ>UxB`b-n!G1NUiEO&f=< zpcfvxPr9|hoQb?0h$&rq4L(%<`p27O!Tk9$04o3SpS~t9zVZ+Kb|gX$<43|ukxGL6 zAr*ReAFDkEJ~Z@z$vqrY>H1@x{`ZKNGmqL5;*EHr7ECr>DZu z3+|6KmbWZK~z6Wf9bt?_tfhR zzkfVh3pZ3IVm?0{tKODRyD$h4d!VXXZ~MaNm*?!OVRh>s)VTLP`DU~kzIAgF=$jcm z)tOZCGbU=_3k~f;oV?4Yd*1NCDLAD{5o+pus;Ud+GoMt#YAiv|82pArGY~bZhNOph z+zcl-dhW5`tL!3K(Kj1|+ahNRvxsF04{#UKwKFPMJ3cz^#QkI8N_6Onr$-B*iq|BS zIXrHE10UJTqhI~(M;cbUCXJ%{En7v++qL25uu-F?Enwd?nz@n+au}$X17h%p{}2^p zWnMuY8+uoe84IIdT{KL?Y|^kER0l1idi82WO=>G2L~VNxi@se&^(}%yeuQwCK;#|l z&@zm2kpDaNMP4-NuC>oL=a3>) zfUh6@7F3sq?v>5ayMKRD-Sv-kgIsyX6VWD$%&?JfIYGA`ZC^PhI;2O)hpt_-)X&Eg zm%|YZ`$9A?nV;PBDJRO-pF>`>dfqtwCM2zX)VgKU2wOU<-^c$xD%yfL{y%%y0T@?t zv`42?@4Z{avW+X9&}|?PT0#<%kV4>x;6Oq;2_z5_LVyHFs3G(kI)qMu&FwV}bd!@N)L1L^jjafzTP+*cu8|E*4N_BE zE9>wLf-V>ZoQa-IGSV-WQDX2dRNY6NT_IDaTT93|(0cCE4@=EPOB*$9j0DGx^YL5m zc!^Jy0}r05p>c@`(uoUNHrH>IjJy)rvbjmV_T%43PA0F`1Fk^U&Igqs8fFPXs|Z9; zEI$8A(@H{?9ChqBG(I~IpZ;y#q_^k#lXEoe3tu=~%Jbv&+v^7HL9(fN#^DWqLf8j5 z37~gp!z%gt4^P&7JpEeI#D0-f!QjTjnH!PDI zuD(+CJNjfvNK8Tf(xh(vY80L(S6p&|6k*~YgQYlr*8w2lX!(h9`eI|ELO-)!^0?Gyw_jT@)K~m&IXuyGiH2EX6!me1+dCMO1A;Mcm#Bj)EW#E zWX7HcX~JII>QP!;Bv5)`fW{xfkodkKc2PocW_ub*Fl1X$3A3 zs>1DVWpdUpek2b({<1VhkwgO9*K3dgU; z;Q`~P#UDbCZWYYpg;kYO0n_K>&%W#^kzQH0WRVmDMt(t+oOkh6k_~C-!v}Sa*{w=S z@^Fh9F1M*DDUkIITXb22_;1qzUlwkNXH1q%omi<^QnCv4S+}9d5tI&c8an(BmoehB zQDSBKUi)i8(nR$rcF-rKQq2f<%O50|sYBr7W;abwQDgD7rfmADEFjG6=Ox$*vB%~uQE-2XqY%%Kt9d$&4uUal& zr3l1A$yAMeudZ<1^Ob~BSmDK2Gd@9dZBAG%YfmZX8Fu>^y2 zlGu7?~i|9ScjX>4ee4Gj&FgD)ZSn+t#a)9+>4gfjWV?|v`$J@lx&`r15MxoWjE zH8o4ky0!A|TW`t}kNy*<4gQ4FIlIUYes+-*ga3<*OE5TJN6h4(FTW%EPbtMdgFfxY z8pt(2XHN+!s8o#nXp+zTk7xAoDDcTpKy8o%K&u5$gJFDgzvJDy#+-KiOiCp)q1u|{ ze|y!vhSqHwHPvtr^DXSS17HpJV#}cM5C+}$TJxLPM=R{fjtTedi>@`B5lic2D8Mb} z9j0#8yXKgK_SAI5gMSCinq~IcXZHbN*g#~K*0ibN7FJL-MfN(D!9wX`{o*~Xp+pON^j9r>E}Zt}Ctuec#AI{K;fI)Nn2;f>G~M)l_csR~ ze1ti0&;89|$DV9f*4eG{Y7!;LksJvE`>9%EfmQ8$PbkcZQ+L(prFXuhrv|K{bR88B0SYf=js8;g@z+_l;ak?s(c{(&) z*&fXQA=+i;9;BC;qRdn?ZQ3-zuqO}Pe#htd@#9T$5|(-#{^v8lAm2=%Hq{iC*rgTL z?ZtU3l?FY1S}~MIoL#)(_1v%&;H<}U9tG@uW0xqwvSr)0ZQHhO8>elYwr$(Cb=tOV z+vfDWZ{DAnH6LrOT9Hwah|J0zd-EYeDX3UKm5#U(NC2V02sRpQKdkp9)|sit*(DEW zy{K7q1EA480U#AuCr51zG6O^-3}#phfU5R(x$mH=&7z<zitfZr;a^Oua|TmeiNy`DV)4Twys~M5hKHyl_8enh9qv?RhSSB_J6!xQlhP=G*V>Nn6 z=UUe6Sf8^$VdR)KzBXRka1i8?th@vHAHPhh!xt#kiRZz+p2x}(vvc*@%m@gL=yTAa z&Tk>w9LG#dtMWuXRP`6d<+vQK$nwe$)LTczxd$lSsTK>1(C8*POB9vGi=11swd%eb z2d5_{1W0-ny#JnWnPp)OnBdz+Jd>Ug9~SUF?@O zX1vsxgEIqR2mIrye(;!gG&^S~n*yU>e}7G#zsn8PU@r9NQ{R>gzPdTY?QKq;msy71%Kc;Ox?8 zX*Ou1Wf8+`5fT`imN@+xxE9v3)Q1|&!G+0I<3pT9MvSQJ)U?(G8Vywq&0!yBJVB5= zii6V&CSd-&Cik!j+-i|?9|7f!_x-eHM!bO?Lw+_&T<5bpvYcIQ^8ytlGZK<~TtZC; zlRDGjbPqLY0weHCn2oNisc9Jz8OgX>$?z-V#-dyz!)hwW%|!d-^Dxqrc62cZx%bYn zdT~O5^P{Q!AT<%ry~=>!W*C!5&k>S5jo zu6166OfIEt{LkxX(8*NF1$EMifqq_RBv^)ZW6i}$mS-ORI`U+WqtpI6aqERkphTra zQmRDXb;}8Us{J(3jYeXDIF{$NsEf%iI%MbjRN8SrTt7kR5^Z{da!{X6<_p#F{6#)V z{H9A|VRFFph#&u_^_@^*sPGEk0}J=TKF+E-WPKhQusY(o8GM1l>%^&^Ca?;SKq%WrPt^7W~f*NyM6{<7exP zpLyeDK%~2#@1d3^yc7Qd<5~;O1yX+kOcJNp>+O_n+ZZ@qk4cxz+S2?>TzaS!9lN%1 zpcn#orDnI9E+QZj-1U1emp|PbgFjIm$;Y)@SwD_Cuio=&vYA`Fu=!L1Q19hZFfs|r zUR>e{<>M%k2ME4P2Rhk-xV|0*`1K3tDcWz=gtJ6^~JP$#ex>66K;RdGZ&^l@K{oTQx_q!yM zgBNKX=BI~o|Dx1fV>VnHXYK+m#pnW$10+K>Fbt)sRiv5$F2=+^vT^JB2G1iUk3ZMo zOqNQ`#~a4Efh6D)v}s4u^v|9Kf5G^d$X~X7Vux?sdVS`XkrR4!d}}Ych~cg7c~*>i z{^phPxEBF-K3}1;_yb&GAxRCEdc-E_|Lkfv*j{O^o%jJCzJ=PR`0_tx6&ThzBL(t} z>S63FWR|kcrh5Mp%9Y4dRwS+Q^JA;i`%=Vi`i6SljcV>+*d&p*`I-O9w6^77erVcH zI^)WGO{CJN4n^`0=A`?q+4Rtl+O+Z$NTb)rNBaCtQAsHR*2l3E#q^QJSsYUuHKgxa z5HD$sB$guU@_i4+wE84}tebiUVeYDk} z3B~_bO@}sg?6>4kq_%5QVj{fQ&(r(t1NquVU*S41t~31efW`T?s6VP3I~DI8%OiN* z_2W}M4WQ-CL4=B*&{1qBads$-@(=olQGJ^4PC&9Z6{MYW%%GY%L@22f&iKRRtbpD0 z-Z}I6M!t)-)XRZB{D1>o6=wJr#-n!A`Hj@^ea#oNODVlIU+tN;zk!BN6a)HbXet~m zFtS>X8del%cEi>!V0>55!dD=rhgY2J$|*@jHiM=zl{UVIpFw>3by!BXaF|^+A0K;KUC!BTt4(z!BV4 zV}D$K7ld@+g|Ei;ppb!bbfWo-pnoj!z1^LP94#Wrb-J<&jBQ{W)@w3UeJTRpYG1+C zTLL5X52_8%9r3sK)qJ3!CB)Ul(~SYZIHs{biO3LBv~mhspHa&MVCOEHmnD+=e1Q(W zC$QRB@k}aJd0uIXET7n0P^BlTlrDe9!A;7D?N6iE(IT|z`yrHOn@M$7I}yQ$O5ybC z>i3+Y@-7wREw0*= z!(uHs_{+!FX`#kSY#&~NJIv%~FJ^^?g9+cVe^`S}dn}UmkVY?og0=J2byCf>?Z0v- zU0GV&!vQh@7^*cn%GYsyMays%BZK6W9MZE*W+8B#0Be>-O0Yobttx3UVy{f0Gh^)} zLL3AHaM}GqPDuVj_!z+duF?4bozDj5wXmv;u!eZC(4HKuP19E^&r^EL5#LbabKTx_ zUU<6D;@xQ^;j&WmRtCBdY8D0G0<1A=QxJNJK<2we3W#gI#7Q**su9vr9m z^=2pOr#xi4zl+lo{HKDlMn}NPc`#AsF)Q_qj8U<)g?AAAt6js#vV<@2@qlp?f)dK$!DS`xtyJvvS3Ovgjjn|7OW)o%~)9!#R%cjk= zkB@>%vKe^zaG%8zXesd@O6Opr51pgKsZJaK*T@j$Wss1ON`?8n%F!k0ylIa;#<&jU zFoFeqIwF`9T0xXn0V7|8tfw7E4}p(zO7IfHM;!MOgWOg9SSa;_G7|;vmhS~P>Q_HN zORi5bEUKxe(|J3;heT1N5-%g+bZQQrYNN(2^iN}2 z4Jm9nGt_IXPItG8YK0_Dj-fSVF8+5tF zrwy{zZcsUX*c>VK6Ca9|7l!X2DH0Gl?Hwua_g!m8xOy{~?!iP#f_)9Dx?oCdT$EO3 zj@@x^mFqiK{!2BPY(LjHHYNMSbpUn#7=O+g}p$?u~zz3b5rL2DHn=sQJA1LGX4iqcivJ|BrodNL_y=bCqKi(f|UbcQK}{PEGAC zk?5KMV4GsZ`5C73{S^)MW3KBzk+J2+Id<9b{BVMmIZLs-U~=}Rv7K|lhd4O+$7#Ag z4g(a-K_R7OP^H)Sj9guf&SFjYABWz~pN|it9*(d{xeu!#qf1 zBb_;6QVpIL36JWj&$kK6YqKRP>G(|tN~9N8fkx59;jDI0cU{N54Kr6QDa zes^zJs7lvpqE2jxOY+sooe?g_6|5iAw}|?zqqzh!Qp`-zu-YD+!Ux-QK$rhMIJlg4 zWSD@5M-Jlerp$)BlEVPv6knP~y~kY>b9ZQ}80GMv3DcFGDCvMWGB!k)f!Uz4!fl|AujH*fEGwN^RX_HU>=RJDubG5)tPNu{t#czci;swsIy`P$R$ zL?A@VW~coV!z0Dr9yvg6=zB@Fl~df-_La9JN|1`|9^rbeXz;zUI6r3>&eqL6TISNh z3eQWD6B<4CcJ8{iYpG_L$_`*4J48`gf1Ind5MMS>Zwv{M1S=)zk?-{>`9LI#ialTF zL{zESSOiN$M#oV^{UqQQEb@Pn#q(T0`b=)uRyrDf?Wh=aLVJDSds^IMx1{cj+z~+5 z9&lqcE6(Gaob=Lc{{Ykh+|9?vu>v2ETv z7*Oi^(}#g@B0?~ZwRlC@T*Ju4Y{CmZ)y#Qi3`>vp!s>?n-4869#{XLFFAWlZYFNnh z;%=ybX}LUmUS`No=8&Lg`Npsu81pU_V>*6CvrF?1^E+Ui3^wRxZ0+a2u@C?`1qJ*o zlTQO`8owQsfcQ|c?Z#u@h|VR%wx4(yLjTc_Vs1~473P@nK*7iW`u(H~I=#~$2GC2f zE)7Z=x)fdqnV9RT6}_fh=%L^}m1RELjQyqJwC{I!bPw_DE-ts4lC6W;S}8<2@0IC5 zW0%bZyqlK^TdYXx zGcJHRiG5M#u1x91sM~?2uw|k)c&>sb;VS&`beCB}$HngjlcRSwkB9coqqWkU)~9}i zzLJ(#OG31E#PNp$R}j!ux^*@VxiN0i9QS{kSZdw`p}5`^I1Z!R>zh1!CzL<9b6CcW zF!Fb?Q?3tn-xw%)peQS+(&KH8oDWq_sPlooJy6(1+W|(wIft6$S}zVSJh4)SAOt_2 z*P!iZ*S?nPcD@fFzol#e0ublf;_G=-jm!%sJGHA5);aE+QX zllP3KzWenfhZ7^YG2AZ`s40cXxW0$+$+q-ih7unsY^cW^MmJfpk!pMKJi_k^_)9Ax zjRJos<8Hvj(Hsgh2F+9_hEnxOcld2uK5r)*i_Fgc&Tob|t?OK@C{<@D3g#OFd|@_F z(A$G60>}o;U`VJ-7kbq>Cx7mj@{f$TOegK=det13>}?17bRr;NH;0;n8y=%Fi&5*m{NvRE;zpzdOs{bg;y{;`K_&W{f@;kJA-j|k6 z1kD>&rH+ixFF_yOiRY$B)BjXn{yQ=8>Z_4Ysn#R){d=#=d9}(*;?EB7%Q?w-3^7*0 zo||&+9lO9Z)3y7D^97vzfCx*+IVjRXsSzo=aS{hI>hdzduahtK;SB=-B&+#Dg9aM| z#`?CL@a(EU6oCAZG4gVR)9%{kP*3dd6dVg|62uKv-ujEm&nU~!q464C+j^-TG#y<3 z^F*5S<;vhlB16mJnG*i`Qmwess>b%_b)sEty?Q|D>xf>a2ZpZE8;FP+%=kUS$d?eLKgcS+=|YjVL( zkBA>-tAgxy1Npm~$?Ai>=WxZ|aZ!pJ1hc-`EgNk1QDgaU=^XEG()OSH5wN7ccgy>&y5r7PGd`BOShbwZ) z$T7pX_Y>Pm^SaMVuRU8YQJ=S&;`_q_UaP1AvJhk**VJJbDYDqhqfaY4v^_ni^MG|J zHI@nCs2WzQbVK|E7j^X4rYtH%8#vN1&_j`{b0`G(4~bpRXRu?gEMFskJbo9fO4^gX zuX>7JVh7I`9`ltn8|f@o>sD5~gp_4CnWRsKBjwHyQf_wn2BrL4MWWb_J1E@X#AXCnncscVIP;{G#O zpgcoAnpO_KZ0^cxL-M@P!PU$RlEd0l{*jg1^$^^-Mu8{R8AiBAq|0GPsEED+&O7r}L6rxhI!&1@!0 z;0zK0@XrS10l$|n?kKku3|dg`5xz|*5A(6!P_uMUOJ^Ca`4w0Ewr3}k8RObErLI<) zde{+d2t6PnnjRW^+)2GF-Fw0mV2GcO)BplM!o0@bs zCbHK_rQR)6{A!^s22vme6=u&<1^mAbj^qJcM*cqB z{*Dap(v}MRCpXk)37@(jbaY}`wSENrA3}BQ9)g7vu4>&#=`P#+T zCG~a#>hl%)C0Mp(F&ySS0vw?1@LXpe=`paOp_0uM&eG_lxL|F<&?y7bTF&IXw-O$V z|JSNN&~}tB$iBPcPbNy;IBSKiuysPD9P*vGmty;#DaXjH;+SfaosO11{5yzne6VdwrdPob}rTbM_oR+%%8&$wqp#L-xWeDY7^3AyEj(7e) zE#Ch2qCpeNEy|At<#F-LfBW>mZyTrmXJaxDeUg9u-){M@0WoR7--MoGr04lR=Kee4 zLnwDC|G&xq&i?-=WBFsj?0)9P0y>ERIt+uz<`oA`m|$fv+S|If=l5%7lG^O)#OIA%_cy&x3@U_B=q6bVJ zbu-@d#?1+CbvOJ|!2=y818un`54_kbfq21Y_k+w9V5i73V~AsXx?U#3I`d~WjsPUI zP5F`Z6j?At`34kmgdF??reKHiE0jK`{GOY;P?nP|vHuYZ9T#xj&opIAJ>tV!oZH(i zex_h>rRdGsdxk^5IMqYBay#I+n%OG`qdZX@JqjhHU=)eV^c3g$$NG&ccoeGw)+qr9+R8Ge%6ZQLGH+@xR<~hyz8DcgAABxpm~kTS)n&?#lCx{h+HPB&O9`(FZ1pO zfysAMXcUFfD5?*?LvQq|2p^Wt>4E;Rcdef0WzPQl(Xo9I>kQKK&j!Y9k{Z^`mz{#a zYfhgJ$9!1&C&Cq2kNAi;OHZW0p=DGvLdOguB2LL{^(~GD?4hIOJ|FGPMgv7|$jOhz z>fE=X!rKmXObO{%T#wihsOd*cbq)K$PW>aEsO2GuYmGndYVxQ1BVcQSyqo+_$0xhC+040*d6Q9%?6z8ZJX{y)w z82KQndRB+jBVKU#c!qk2%XrS2R^My#>aqm3?<_-G3O)5{xHK0n6uBweq=RAfM`ir_ zr>%oggMb>S-~-mGoD7xU5;?Crk>YE+KcsK5lg-!9Hv^aq~>g)#30sM*-A&VXfD7fqVLbHRfnThRs;meqhSdTzM zRPw@;5x)Ru>DbW5`Nfc2la|TP`atBUEtK8)xfwxMRT#*uz>9D1h6d{)`I&~l0J){e zd*V@T#6M4O`Iv~fZNyxo{kMdJOQ5RulDW-wUCB7SGO}C7@r(wP1W>tN`<{gNQJb{r7$8bAZpBW<1W4P^V9=lZKHcjkl8M*HqE zM&Ag1F3}AkepEe0Qk$IKqWyVh%lU5a70v9f0^|@V+laGDU)?~Gf22({iU-pF@^i(- zFHhObFE1CT_{B%$DXpYano)A?M?FcaxwaIQk_0yfw>kUS&xk4$Xq}4o1N-|~Z)(v0 z^Zr7LoZ}h+yY>km926OwnL+Q_DKrf-GF}`xhPR?z_Wyzq%IZ7yg^iA$Uz&sc)l!2l zJ6J%!NbC;~3XhFcUf>1?$c-xBpWohl#I{XoV2fN zwytZj<4pLoy=J=f zlrS0y1*svwM(>@FUxXl%(LU<#tr%WfES$1jDI-^OoX{3v!JEllINja0vA*LZq zJyHaKGHgmURlusB6lI;8k*h@H>G5Sh`8 z#CCZg6_74!-%p`C2)dQSIyvsH%2MTTo8@LH5k4q5=O)EVIpDkSAB_%1TMG1PW|hM@ zLqrK%y%)@EFN3s?W4=P=n0a}G0S#SBt(Se`gO0LOyv#|22kda#&l}Rqls^7Gvmg@M+e>+d zJ~|yRF)e#^cmu^)SypXs$MD=O)yvA-Tad2Ob);h3Ev>~>UQBt0bq-5)kL{Nw;5Hsf z*c*6sr+S2E})q5X=4^r&;-+=h$ixtG#*wp5?=l? z6!OT<6lsZZc>M1p4(xrWZM_ABdm7T`=jX|pwq1fh7*1!+ZliABHzs4UsAnUm%z%wp z9RTxU!$(k)ed2{rBO&W<@1eMrV&ph%iXhhpxr{!L!}dkiQ^7*hpfYsIWsoU>e`ZMm zDnpo>i3Tt@GE5j3;?jJkP>>H=pXMQD7CmXq$g-{YZnb)bh&$FnPPl?^x;3Ki*cem_!9m&)HqBWc z8R`UcC;)6*7)wGxp#KW zdC;cpYSE5Jd{w}s4o96S7vpwWuB^2WPlDu;W2}X8#pMQ$7Ew>T!y9IQ@BH{^{Xp!d zAy3CePV?)V$RElAuN5kCW2;*)5hoU!gHlTZ8>7XfBlA|8$twP$ph5 z_feG^AF+Jd`$iAL0M{o`ItND9LoYaBn5t(X;duDrw5f%;WR*#Fpj*wRR3O;X4 z{x?z!o>;g+)DjPfx|zLsRQo4==9}zdg;iQ#A0SG&*3%hF;(APw5IdSEG;vw7Tx!5pB%;{b`B9_b^ce187W-mhzVmy zx0IrT{@TgDA23|ZNFJ!JpFfPJsvXig{`;HdEaq3S4<6YVz9OBp*_*r7{jm z2-Yd9$RmDdu2CEkgAB|W7`T7Ansh<={D#C<1}oOm z`aH~nZs8zl$#_UJHDU_=&>LsUei1J2of@F!nJ9yZU7d0gYaHciSkRNr*96O4m%kgk zHb=cIvd3c zZ5jvH!7^zhtz@ihCimgh6j3pyO~ao7u(X=QqSj7jX=*wu#RxD6=$|WqEL=$XX$b)j zsCYWlEpq9T9f_Dblsg&Ag@^DLh0w0&qW^vr9rt4ONw3c?2y6sHRcmd9~^NW>tF*KTTR7+c;Q<9kCCUKS&_&KLTxt z67fuXjfb%!F^5j`H5Hn&4$+vZJym-8wM+7m#CP+>4P6?9_6EgA4Di{si|+$HDcV^{ zdkUT*GJFKV-VW`CVHETh=6QY-`ql~?{+x6y4S#H+yB4StbTtUFVPVl!v)A59d&&g% zdlhh^Kg!zDfE!;0%G6f?w6!~n9FT|eX%X=I^wK#@LwG)3uN*axK-y0mb|lE|-^(rc zs{E9i!`OT7JH?`I5B9763HQPD#bi(wDM1+2>3r5kl{gZ zBKui+8^>ndf9;HYf1pXyATJ0efZ|%04`^mHS zh`Ed!2>t>at7U;E;0BP96HEt&#eTOgl}cJ#MhCO_v@;y(x&=Z0Oqz$m7^2%WVTiu* zt8k9lUb??RBMGR#K*Cyim$V6?aD{Wu^l}UkW3rIj=97OgR(=P#HCS53{A0dRb7!G2k%>sw<@*R zd@NNG6tp5<7x7py2xpI|3`z^krW40zoS&IJaZ!sL3N$#}CaO9xDtMGgih&GMR=zr~ z#PI{lpNb$GnE&Yh*3UH~b8nTx35C!xbLy=lT0Y^TIO`->SG5w`$ubPeE6PPIvbzm9 zhW-UW==rl2s>Ev}L^Kz4QIcnnoR-J3Sd+A)=jukHm~@lmRI?OwAPkP27}oWhu`|QcP<{DwS&LYg z>=K-Oo_}cw^CFuU|=! zuKUA+H1wT0_UwEy0|`ZYkk3Z~O`Iz4!5DJd(WvBZ_doDy777BU71?}os%?N4UQH-R zL38tj&ky7MwRpuPkc)5yErPm%r1`e^-COrYe-*A(gFWNh>wS{D?k07D$ffvSlK|S& zc?c<0J=d{{;d}?$12%Ci|D5|BRjL1~Nq5^;Cq-4JWMm{eZs59&>z(A%-K`E{xQe96 zU7#hUq@Z~ihD&&Oq*v8e4jEvfv6kvb;p-QFAxNK2&tvAeikKLRd{MQPy;T1LhT7)x z)F!ljHk@xif|oh&=B)Q3HEb;$f;m3}?vXIB?E0G4*81~TJoH#rb|T4@;yVzI4# zk?WeRBi1s-bTewkyo&jK*v1?ScZE5!6_2H|*(9`6nWVds;I#9mYY9V2T6#8$&Roqk zzC@`*ex2>c#n{~K{8>NrK@rYRMx221{4|ixiKRC+k+iFNfiiNTCI^i14+dP?+C}xM zq5#4Se}S?&HsBM1eJ088rrk+~f9i1i{3YF0fam1SP*rP!Ptb;w$`CW*DE^dnRta+S z`rlgr>E0mD9rv9|0ih4~t z$-0MJp;b38o0f!OXMJ=3aNP=gw_dI>)9m)FHiPfAo@m&|!AoQDh#+BtoiR7glR$a= zt;sN-uYE>^!FUAq@QC5oLZp%m$pdce{H11eq@@Y!Uu-t?uC5AhEoc%rKfhqHbhWK% zVqqPNo%u@4^1NRmZ7IclLn^}eCKg;b-Kto+e?*p_cxv4N#qhaQ2cQy|0(gN{ct1-^ z-uB!Ry}!WsQLlhbNM=sj$r79ELjc>ju+n6D2XYU82__*W8#{i2sqB~Zrx7c6(!%{V z)Ji_0?n@(?ywXw!GSE)otBEW!D8lCIAa}(osD?n7?>Be-tb^<~e4hpwz6%a-Y3=o~ zw6jX^-F(g(G=A~X=B}cO6Lpv#%XL64E^tNBiWPW)P)RCGF2*sKqse_CVOZ@`y~6@R z-^T8Ds(VdPD>c27BmzUfM+Tw zb1>`Q6Q#o4N=I@P5OB;%m$gw~a*i*-6`icb=kku;S0(0fVV=dsj+6K2+Tz}s;1FxD z#}EKjukx6SPiAJYNnQ+3CFmv*VEaE`20kZPUvcuT0${`*Mf2rl__2B|`elRs?4z8K zmFu=*2#9jESf_-9h=tEgCJ)12n9lo%ORClAQywEzIHE`biY>08R!cGyxNr==tZoR7 zQPU336|3!vt!oeUOeBq=nO-G9^_OGEL2v_*i4|c&?_uL*!Q7E?;K2f~vEaL~gCnX) zc^bkXAt5a!UY~E0=hsIsY6c7V4+l{(WHq2L0W#wkC<1|Dx6sz-W0>BJlHb?irUNj1 z-YPRPLT|WaP6x}1G$UjT_+#rqif-WkB}mdo7jiWeh7sHJJ-d)f7ok6;{p}rOzf`n2&hNka@c~&T(Zo52EJq~SsBI{-7N2!& zMJSbKXH!6nO1(OH(NBr;#LkYWF9sG+7PIjA;`8-9DZ!$Gw|ueu9Y;*KA3PpGro#>T z1dMO`rrE=H-U@($dNv+_mVe3;qr<2tWct2C;YMNjV*=L4)EpFJhJf!t%msiRGn$EKG|R>%Px|sn&QK=qd>XZ=(I9e5A#WPh{bp^T z4t&ie_%XrdMmfehy&}WVSksf@^2A7uQ;hmLnn9z`PJCvJZGz!+dm6MyBg7dvA&t#1 z89q2@6OmP>*GT^4RSh?ByzjSWoH|Rp*w2%pkLFLhsGN2}TFS8ub+-YyW1i8iyzA>o zU_JtX`?>z%_p{^*zUxouv5)E9{LWJOi@2C#q>cgeE9!&$Dxh6XYbs~C!AB*wFF=B` zgpd+X+1zvp|JB%NB}mEn_I5bK;)K-PN|!lJjF~Gg)`Jp><7Kyb=tpKkx?%vq%g+LY z!v5IM_O#ZVp@Mn7&v;u;e1LtzRF&}r*NRZomg1~{Wqh~}4VTYe5?tiSD^tJPlhOF_ zKyWDeDS3|92SD%)BF=;GXla0{IiETzMUTM;_`ehsw{cKcE&uid+y&DC>0-oz@TmnT zHN4w;gS>?J-v(1eJ(vKA6AW|=?rbTjE zQBi>+jNDs>XT1{2Apsr-9r$7gQgwl@2S8xoDS=rAO7;(_Z~;~GyJn=ODcTR z5?hh}0EX>bFv*}H*)2KGpeZ-5wBv-ZZPEx|?|1`Oi%yCxb&R)BAj5i;ccSGgI&50# zXNNaj=3B}JB)Fw8gCvFmE30iBFWh^9N-T9lodR3IZW!U&1X5LQ*SYw=aNr`K+kh$n zIRl&IQW?`XAiSx8liQd9&Y7zu6a!@~wz|{4S|I{3vt}*Wh`?S5f5nemYMw_O>%Rb zS;7g$M`~&#EvaE-hP+3&J*Ip$O?74J--7WZS%!S&c!xn94HYV6vMZvYV$%db9LNs% z-QHTYSI%3AWGJDFQMq8&@#||mh+~Ycvel*%LA^Bae9#FoJ70E`2Kjdej67*c=_FbN zfWB$GWZ^Huo}yu61B4AaZ*8t(=A@dDN+ZuslkJV^D10-|=O+y{Fgki_WY0F~^^W!n zOcyA!JQ$tAaN-z*sZ;wZk4zf9$C{FxD{Ju{7{frh4i@(133WM6z^6hD@Fj{}kECA2 zFKbctTPFd;emKkA#_zRQY5Db*rioDLl=1a-uCrc7ijLPH5K3HiO68Jizj79NB}Q99 z5x4&0QwwPk_ra7vHKXiTV8We+fdxDZXb9AtM}nR-gD8^W5$I=^<>rWmsMqkL3db3gF{@8{x9fL7>Tja#p64@gu1YNFVSg7o0CWs$@Cp>6pxAGeZjR*^LN&l$td z4h%Bmr({Bt_S9sSnfIpzbS0v`2lioaR|4azE#Mk*#$N?zFVDP1TMSI-LIg_cM+k6!nr1UU8x#ZikMx*^t&q@rUZ9>=~T8Gnr z)&Sn>#<(`t4{=J%}3YTC??MzmzUkEA4HZRW@!>)}xIp6mInxiD`) zzdj~03NccM?SLt3I4JzSt3VJ1o~{8c0bM#qRoBGvv)_{;S4><9hviJeMvlv^xnNrQ zs_0WiNm)Dp!msDixfWou^}3)PY;OooPig4-LXH6vWk3%ER8sC!?eW<6cXB0$n@fl8 zf@5*VO^$Di=VRV?5)Ht7<9T2jeSY{*JIE7a*qizl@|G!;uJQB=9ThKVLCrq3wp<*` zm~E+sNnPKyPwj5bUFYvWbxHApo=;J#gr(fK#Z|xEa-l?Tq;%Z-#?)WC9$UKZ+b+bt z#;UIQyW<=79YeJ$D}}N{ZXNHao&51S(lz~*lM?8(kf(_$;FoP%xXk7swh5m7vV%$D zSbpOSt>&f6Em|76ND`SQ{7<&98~d-Rwt6;^5Wd?o>e|KB+Vn5U3QF=RHCV#MpkKCMsXgvi5KR7J&{e(|4>hu5;b!I!u zL?$*=-gh>f&d%*8@N8kg71M3^{hN#)N1QZzv9@8MKdcuWRr-lfAsll1)tCS^rM!(nxm3@D1B{dos6X6n$+ z+T6+#X8@%z=3#2qB@OaTB+Pb>=N_+`aB``WVOBN1 zFJj)$>C%cE0wB-gmPL-&D_kOWv4XPFSDsZ2rn+9tyGn*sk~WyHo?^J;imzUyOZ zkEx!1M?9-={wcj*Z^BhHwej_kPtO+ zca6C&y7#Zl>tP24(!*h$4dB2MO3Eh@9?6HlXZ4!^)Ixm9k8Rq^w*YvT0N! za)xjB>81t7c|Ffe*H=>4>qP~-@7q9aQ_j#(+>;+7I({IZa`|`WjpwLyAxAZt@3qJ%h%DRZrUuf8SKEC}ah?i9g2TqIx@*6Q+@h0c%hl2$HTCNR` zbK=vB$Qh&w4TSXT+QqP|+!1q&p!IY8=S8$v2XcQu5)3_S)v>jg)ng%#pl&60@^y z$mG&X97cSeQcGIK_F$8Q4G;80tB?Rp*V5$++rRIfSmZ@T8Ns;2$+%bMzpEbT8GJjx zV(c{+N@eHRsHtgLIYILhXG#N!rp>$^7|q6;bXl*C*m;7Vg_P@ks1`d^A?ht)HXCJB%e5(JHJMJ3C}@{9y^=)h`7i$%%!nFXum4#;I74<1(= z?aCU?iGjKLWz#`|j&^fxlrN*cD7m$ZOa=rN1-+=wSg?awehRwJ0Fcq!G zsD=L>^qkeB!sdlL$5Y?ZfHrI$3JQK5t9LowijHgIn09Ol&_j$3Y(4K}MV^1eT0rv< zYD!>`mP|@pFMCSR$nF5?p>jcfmZw}2a@~hdpXl8c?-lq8yj=-mNIX4&ln^Flff2CR zfQflUW^*XJSiJzgMkyQn^1@BJJ}O*JqPT2R2ne7s<>+CGJ!V^eVsdJ9HXN#DHSKW| z=GLP9dB`ZG)D$f}$OMLk!EjViKZ{|Eve8k$5o0{pKF}|6B61F|j237W|DB~|#q0H5 zSxZh-wS1grGuU9fH*ws_Dji#sL0Dn(HL!&-+iEIz`YpdCUBjk2;*6713jVnBBi}r^ zCPRG6ejIgD#@=KLF}w6toMfgHHQa2#*ObDOz@&QrhPfF!N6~C_Y>H75Qo#qq@Z@k~ zZ35ocBmH;{%c?AtHggiYmdB?1_HB#B*VPp+ZPxrADndq7H8^DJp?@RAaMoc_1{AM> z!`@`3NABV9i`{A+X>v0H8tRIMhY+!`ENq_0SnLP2fVtcQFM@=a78-X1>U9G@z3_M# zZ~9#kZ5b5xDJL2~<1WykXqX!fn8L=@v#Q$|OxkLwx8c=imwlRJY6;Iv|*w? z+nNpu{;2>okgBR@Y}2C|H%#U?(naRIPyzRmdAo%Z#rM>-bwd7oZx-ZGIG19~$#7fK zJTLUaaLgJ;=;N^E{d0)?vM?6Sy(vS-j8Dorvn}VrxAlSZ0=e16i*(-9FtHQztA!SIVt*3m zt-ZGzo;RIp?YTmZ9PC5Q<>U}wIrYD4=`2Nb1=OQhRO0Go;5ZnBnzDd=AG97KZ+vAJ zl92x&08&7$zd&K7 zra-yaHvaRC_i4}KHvniU*dAx`VL6r-re~A+!%wZ^jdD27-*leFJ$dB}a!EML) zqCE18l?f#k)@mY!pEJuh(vE>1;P42oLo8Qmrug@o&FkiyLnm8)o;0!CB>LGOd?p`* z!lBAySUmV76@yoe2TxZ_uyy{2mX`)hsrI^+-c(s*uXqX3&6vYry(?j1n@w}Zm*>sNHSxP@yaZp-5C==Lo(?ESZ1 zk>C9uySWW+&VrctVPj5^7uR6NehoI={o~$;HE%&)jx=oI6*fNEgdNJ8;JfXySLCR} z_R>!Q#BqnQo_n_&kV4oTmVh1YivjQ>6#x7{I^-*CHR=cY?b+ZNm(j(bkm$Loa^r<`z>>{c^ZrWPjQ zvM}t9<`#aqhPGZg>a?#*Ag)k1)pa(llRF-G3>)5(q%VL?K1c?`we^n!)5nfr(zEcHeki}V`CV;CRU2w$QgFy7q=BiXtS(cyH@cNUSEpMYl+xY)7{r0 z$IRM?d!s;2IKU0U!!1YD*Y^7Wo$|_)PsqtX{i95*E|-BfbI!R;aPauDR_giOVXK793r)@oY(v+WK~xFnPMHSh5grB0v7=FR*DT z9p{P-?lV{@_uc=96yYmv^-bI)2KcCN40im}y|rkSR2NOt(gN57iB`aVP+Equ$!pVE z`RN&_X}SvBUbk{JHoT=J<9i0&#?&bnUVejo>+473s^~;X;-)p6I(dKLeEHq4PM6o` zub2F+G}(;y%NtjX^o{bjYwwb2SDq!Akh&x#sfr5X$?b3uUBOPd^_FYpiklvmsk=;+ zMN8Kpu3z#?N^t93oh0Q{%T{nZ!cvI|d_`K!7mCp!Zh$l9c4 z?P?i6af+;Z{~metnUmzSlaA2)7+5C8sw~(RkLQ}zECai;6B4mwK7_m0lcXP4^-eqN zOHz`Z0KAYM43LP;rNRCNx%b9v<@aI3Zx1%iU3SH_ny-9ZDK_V>M_$0~+9Jyqe;{Z4@;o{J{9j=MFsei` zf=I?gs(|m<7Jl^S4teOMC6bFTQf_W)L_9VdVS(D&q zx$3W%$lXu8B2y+*%VO|1leqn&ta?wm;J4?>!3XXw$=KZ*!W{%}zByMez5G&KhO<-( zbMj2B)=9s*uW z!1q3|5qrOVr2urL9hWcj2Y-$184?$&;#N$+A_Fn^%ZUzm1RqA$e@h z`*O_8-6&?XE0CI&q71+v*_F?2xgm!GUf`!o0ouGVO6B-Z2dSKW=;*%9+&x-Oglj*EeW#`$M=1Hgi*u1u&PS+-TFbwA?Qb&uNtzw4N zuQ6B)Y*}uOKiI-A&ch5I+=inr;n~^UkG<;9Oy@j~AJ}8YbK~W9rdwKC;LO%jP5F3d zR)FN1txNSiO-lfpM5~KDJFFbXi6^!czhi*@>v64V25o_zjo9YJ@(Cg0cJ zVfuT5rW1pAj^Ev{u?3pmGbbIhr>4)(2CY&5f^il4{^#2sF=nO#@Si%`8PeI^yTsq%hcKiNby#4*L8o?e3PoVRDxxKgmjo=?M(jZIrBB2 zW(oh0(}HHh^8c8sTswQq#9jc>w!GTT;^r=_vlA%d$XPMo>{3RbS^Ac>epOdhU?#s%gig-*Hb$8cg7+iB$7q z{_)i7N)Ph4?dbN)e_AAq-G-wbBB0A}{buDma}*}mrMdLLVUqsoh*d`}8qabE|Kai~ zGp>ZP0BiG!X{I1M*-V{&kh$m4S4<1)qRU8J5+E7yXNiJ}>c+mH`QViY6=o4k;%v7B zET?5-94|7>ys^-3AnhYjSTTpsEVp)eszv_MJfKck);5f+u%KJnuVZZa*X8=IaipoR_Y zhX`q3W{%s-TDvmiz-y>uIra^(ysPebS_=v3T40w6wJbB|o%vk{Kih}$i!E+U|LmJf z6+Zhbm%svO1AHRf7RW+LWBfS}g+1JlXX+o0GM{F+Dt>MdvW;jLjPcgJ!Tj(W2PmcnIUT1PX&)&5Lg> zv_ci-=zbRz!4+e!R;ld?;dULc8y)73D28Omj@OM+WXLP;dMVtmgAfu#pQfumHIbvg zZ~o96f0Q+i=N;J14SjCto7i+^#E6>y_P-SmBBL`C4Y;^y1cCa>HJAAB3 zb=J72?0t+`(V$|{bfGNF#6UFs!Q|JYyZ?JkSIvBLNEPV~A(xhBl9Mx3s7u*C=B;%! zv?CuB0BcOMX0K!F|My*g)jTC77;J%Nu>Y8}D0BPca~4>Ei2S-{4^;bfS)$uCE`3q+ zCSWkB%Oenuh0cbev;KO&-Iz(TaJ`gIl)Yuuo0=yF^i2kB{MaN`R8pZ~H{Jh?(uczX zUDqW|k(Yy%P=La{*W7pgA2e+#2R1Ny&&lDouz3C z@?omQ(o1|Q+7xvqtro0gX@txH3jGlL+=IY@ARylJ_X{;$Rn>SbyJTYj12(5}zeeI7 z=Dk(0zrxY!BD4+pzZf!se?^6bFzcfp7%b%G+dg&i)pwhAf`ud1x)d`Ys=Q5Hfh5{Z zcda7DJ=b5PWpnQYs_yhtD?abUjLk4%Qsay0S}PG7;Lc#GE*Dil)s=>-v~tqc2+X`HU}|Y*yLrO~?m%fHo(px=ES?Kb+>c>^G-tnU!Tl zCIbW7Y{;P`)H&^QCz>@)cJNLVwFyI%DLtkYdqT#69^y|mNszy=hnRe48S0U_%d`wq zIn=z;{QUH9EBq-FEA?4e;wb%?8Rq5p)&QQZ58>;w9!pY%&o-f7BO*^`7{+afO$p8D zxlfq_Y^I>}wztSDb6!f$7xThq(9zd+_y;nY-iS9lI5r{S$U` zJaF#;ysU!S?g4M(MlOipyg)*%;FvoODZtF%x*8!xlAoQe*fOi=HvxE!D|R73*z4btjg1}F9u+x&#V3uu`+!955V%YVLYj%4PRN&`Ny#7l^BvQpViJy zVZ@*Zx$pwqcrOH$j2yVn-gt_vUcE}T;OgoY+*9AoE7W`5ldg8VbVep%nYPp*?jmTG zr{BTZ^pYZJ@4%81@5a!Y!8~1t{D=TE>8Z37dmPl*U22?8~ z|C+Z?L+rMFg0<888lyz|!Ua`!8XalLSn z;0Pn;5s+co_G;f-uDVR7m8VEw5O*(tVYuLE8=fH8Z)1d?9iD}5u&%ZNw~Qu%|BZCv z<3Mx^<}Z>iG8~Gfoy$)q$IF5R@5}S^>IJ6{a9w$uE>UIWW;2Uia{hTznu#j0U9_&` zc#bI$hI>K8pCtzzxSyt7Ut2G2xR;^@%cR{vt^Lghaqoz|!I4X{q&;OV3ASw2TB(7r zI6n=S4S`>9saDG-elww3##c?$mhWm=EkFO&xpL)=_sT94s$}ulYE&S#q{~V_skqC!FY{Z~fH@W*njXbyWQGl&R#v}9Cb#@FaWn=9M z`4Bvlo{@z$A6y{Az85f(l%6G<8|&~ld`}kr=9vO zN#&(7(6x|ff-l!9gHbK4z~vJUys}D)Ai(Ou>^|InmV%q>xB~p`6HXA^oXc#wibvSw zL3SD{zaR!0xTZx)3W^oE&OR7D5zlpSy|E0T&CR%puQX3Pi^0IdkKPu-B{R@{(%(`i zD^{)2yfAH|&KRh3bO1MP+9bOcOG2PT8>&(G!NnX{htypXL9CCj-=GngVk&Nay^0j~ z>tk&n7Z`#jNKdlsAAb-8uzKqs){Pe~SgK*Y7<{k-Nw`R4bhe-_tewY57HZ1NRnTZ)7hG?EaZZCQS-n+B z{U4qtGbTX5;&LBfEN<%KNCf{4FL}a%5VIiZ#+rUR_HWQ(#;Zu_Tovzm`FTQeu^qV1 zi$f)1+Sbbpug+z1?7QfOBJ<%(aM{v{%ZW}s@oU&)K`!DS@J3ly`?GEE>I~wNQmg=0 zJpni1?+mdG9XLcx(%h)@JNA@CU$0H3*w1GY_4qu>Dw2 zRj%MAH1PAyA)TaV70JoRVQ`aPiYt7fdO^4NpkEq$Q*c3xBNnml$_%8g4}-_L)yuTZ zIK7b&DhRh_u3H1%Oxr_BE3+4vBVNExE;IvS~xD=6iL)YT0*2 zr4CNi81E12uz^j+wO@f!un3YeBRSTJJb*uZtK+RVU)RK)Sc7I5mO>;MGNB85JJzbsz6bEJ3Y|^NYDhoz5Fa2 zg(>sH57@=ZswOgl%7I3TPIcSH>{83uJ`|4+o*|IMi@YIG1X* z+OWO$v&zv3T&otc$L3tS;nv4(!nisv(3)qp<0jeyaBZjkkF#5@|BY=^SIk_b3mk>* zGe9_P<8`B=4s$Oc{feMU-`@2%!SHtt9|-t&L4c8>o6j`FBl*el4JE41s1ZY%y|wsY z8p<|d4e*oC{EF?h^JFH!%r?0NOtV(8xLg?T>ZJ?qyFdE5x1<7t8Ov9$F-=%X#*|ud z@hC&ZWI2T;cE;xp#uO!{R#g;^x+(yTBmS0i(sty!~-7$r*XZDuOStRK?GjathV z&b3FLna#`&Q@pLsK!Z3wi-++~s#2DCKw%)#!g+lJH6Lb9OtY+GaidLt`p=5#H@InF z4;~7PaNF)Q+3RoGT;*OOzI~&|pE^42#g}Gz{7!@qRBFtcRClxOw%wi^#2^pUsYk?7 zHH3Kd04lb-*{jp0db}P8wCG^Ftft{gSn;^&nxC_wPV;Ou7R2v`K?U8MsP^!j%ZxA! z#{0L&V;;>bR(s8K{jyw3^D@#_VQ2_ZK+H{`hj}zFnPm^&^$0KIpc~KR5_t+rR$2-(awauv$*0xc`TXl`L1Mxn3zk- z_?uU(MAJo3F+B>5G|9DfOY9%hpYiw|8&|^|I&4%)C7ZJv?b|;(pUr&{6oRans>B78 zB}&o?OuEZ(RP`jg?$!sfrSNwrnXl)rTPp;j`a&A$7M^X(EbeXjCdFn=+^PieyR2iY?-=i45WE5Yk-pF!tR zu39b-xKPD|ZdKH~KJVuO4bfH4t<9_Lwdd7W7}@pvum`cBa+qy^CLH{UW9^6|4znR$ z>u4jJ5T!{%fYDun8`ibk{dfJzo|^Hp7sZZsli)BUzmVBxtv&R|>#QTU$Zovmr&duY z8pOC@fCx{;d~(6jwcJ1d$A_PF4OR!6XN5k46Fi$TzSf2EuB=YV)XkAV3|gXkuEM6k zGk?Ct%iXA(Dq*6d5ENIS&HD6-r&*~_{DD0FhA^h?6VsPa>?=7sdyqZchN01r6Frh} z*3qG{Yc^xvLegn-V;+|ha?KWh?W>pC-S7Tb7ZobaGanY4lRyGFJ^;+(V)=trNex2^Q zCmv!S-FKaBTZ6h3hLznj6eI@4+7{Xyvm+De?i2;a{yf*7wxCx0?9qp@n|H7i10j;G z{e{i)fYk5AQulUikTN(#@Nt6?ETm1yhPgZO$4`UJTMLzDtqO@2_USkqY{fF2K7dh< z$hWzxdVLnr)e3X4J*;xfC?5np5QrSOg7z}`E_ZT^gH|bfQLU}Azs^`;RqXy>&Q+YE zK`1TUO>U@l_7!UYbJJ`N;l>#;F`OZ`EJI~C*WGeoGA zv>LX4ja&VbH@6*nD9h+D;XIr3=Bsw;58`ae1K#J0eFe4wp@G};sbd`aKBFR?ShHrz zDpKDoo0{ALVO?z*0@`ByARZ7UxrVaQZW~>UP1F>Kkpw0}9;`EydycJ`v&hg3q0%m~ z6}~({SXF#R zf>saq9CFxQ?OS4ruX(KcMR=fSZn7GUi^_H_1g9|Gc;gM;G>If{B6V^iM2L*UI$8P(X-G9V!cJ>K-12%+E!A4Bc>&MvilR=ctmDvkf z3Cl$oZr{*E-qluBUh2)z7}r=tF@%d_KpMXLUSrQrAL+f5rkd zW*w(IrL^m;9Jh zB4`!pszF?z&FVhIxd`t0IXFG1SXVDI7L0HI&?M4kKMDEU%y;1R-wFW{8VyZHV`d1t z*S&MY8k_mrOt04l?y%R4dYSn7E;z6jN$~}x7TGr)iCx|%A)v`ORC$G@T#%*U8 ze*1jO?`*d7F1g*RvBI@zIcu3R+JQwoxrZK|Zj(+s$Z|;+q_8xA#)vK1-$4>i*6|t8=Fw!he<#H$n!k{&C z+6{pZ5N^#(o0d~q9Cx}<*Dj104d)2de0)A?6*|VoQ!i>)F$B#VeEiUWbcV5}=@q;C z@fny2D7Maxs7`@<5JSQ3Hl-%^6UY^U&1lDy-u}J!xEKxF6*L%$-s*rjbla}rIj|~F7xz5Q2ojXiY&aX zUDsr#`<`HBBiK7asI*EBCxpbgEJ4Y2bwq82R)r;)<_;QWHIuf%;17Mny9l*l<3?3T zEk>LQ9>T+p&QAK$ua9GI3dg2eml>xSjO_{TQr*}379o(?m|=O=v=+v~azQ7@wB(A} z>!Nz)QE}~j?WR!Xd!VEW!>lZ@l|fo9H3|EqdCme#GBhPr^)+ZR5Kbwd+#X^fB&=V# z+Gfpq3Lj~0#8l|t);24|+S)6xy=u=q^|;m5jS+R6|0cdNy0eb%Z^d!3r46jwPhUP=4OUsh-mEaFCbfzl9!aL;*{0D-eLQuuzFw zTdr#CHvZd-(8&=Te^$Q;6k_59CDrz^qmH-xZy9QP9D0o9qe<-OWiL<{dsCK5^fUXz zt-rI6AHBC#vP6}w`}c*bsyt);wYTScoOYJKI%!)G_T^Wig*y1tr`xFFVG(5@eg||lPh}Q^uQ z-6kHfuKj~Ie;#1`n?vM3;ay1Q9Z3gjpwja#Sm3$+=Ep62L@`8|O*|mQpu*t>_S(lj zwjXM**p*ob2~p-Gt@>YPYDwP)Gas5$Y)2e^urvMug9L4niP`Wc=8u~>^9|x1WW&fo zs(MBNGlz}#8#Y?>aB61{JWLC&s`39v?x~8ppMtwPx@` z#yzgqz{IuPP!4CkUwJhWnl`Dui6+B3E0?$0312!CCJGGYDYc@?j>X4cWeGKCToegR zZCN3N9U+39kiA_<@RV0uS?Ni%_NcnM*24uL?i4_!_{Mai<&0vp8tsRxXkB~=jC4`s zd;&h0%f~>8s;iGkpA4^XSe-M@FM7u7SMqm$uOWULb6_Fr*WgDDyPwr$82$FQ>&Hia0J3o6o7?*YN2{H3Q&%q&h*3# zvb3Z{TA`?Lr5Dw0Tu=5Ukwz^ika4C=WGLpr7Z5DUD{VbSnUrTSOn(&`vmak}x!r%` z6?WOxH(5zZJkQ>d?5+6wBY(5~clx}IL>N$3%2H>{HMU$xrIOBYfTJV5*P7t>aV~cC zwAlPbb3JBYDanTbS@)ogldGFJ(SK!aQrY^LhWVuN*e>rHg57uD$<8`*AN$P%)0lPA zrcQ18Yqv2N)q3)w8*I+vUs?5pe9{9mEoTYOxWD4@C#QP;>(&Xk(fF`jpb@?EuDftA z1m%J6g9Tu91P$+DP-}DAyg6~{u#<8WrhNgcnGGEL=_A2UEkuZUk`Cud`IVp0QwFf) zxdH9ttBFOx4H#gOCPK!AT8UR%SId@}q~`@{Qn##=kOl0aj~v!;V?S*e0-AL2PTZ@2 zK&V(0DYOtJmC)}+WkF<|?Mug^2=YgW6%8Xqh^b{hsrc2cs$&#Tdu`6#MT#MmB@L67 z0v1{TRYmO%cKbDF+SQkyXLmmOiWOie$~SBK(x=%Ie|yZvo_Hj*mCRI#w}A)p$P8Q0 zdF#Ih82{G8?!!w7C5ZhEVVAm{b-9&n-dn&^UC|Jm->BiGO?^dNqSIpgeuN7y9M2gZ zV5?MC&V-1ii+nW8^fg^reL(?IH?C<_yGg09iKN!8Q>O=T zvO=pJ`n}qWrCAVe_)raG4J*e<#=Z`#2tn}x+j|f?vY3J7Aeqmb6`8L@a_y2GgsK38 zepa}{VayKHTE-OV9I^eC4Mi}UU*9vs>%3NLfsCpc)2y_#MoMo68*720oG?_83YtUl znq5+1tzma>uxF>gj4zsZEb6IhgE{daH-0=jo&kanTn%@oc>=fMh#JTB#ErI~F)las zc1xo`y9$isYDTGfY)%5 z$MttUW)mw>jj|^rhqNL%5|pH+P*!S>Gy%$2HF2$gAT6N+*meNJ3FAw>y*>kRD%FXh z#MO5H{kM_<`zh8U*3qUxSOSyNMlO!pMZ)DhCtnqYqp$VUhM@Pi$8L77R%X_L%X{;g z?aA#pWn0HH&5A|fk_JPxbwLoIhDocoMWaV^nafH$P$%y$TfqqjIqrh+`s6a(m8-3;utKV=9!jS# zj%y&BXpHM*q{Ynb;8ev5yZ*k{t+JxnmNg0g2D#-$9&GZIDRE;V_~@o|(dca05b2mR zZSot$=>si(RaBQk=Y(^VOV-lH#ClU2#2{OHLl{FjQuia=_v%+dC6!cr{W2u-%0?A2 zh%hJD+G_!fUV39bzZw^@Yza6UnGB36t+Ra(+~0az&xG;+x0SQ5KL<@wOY2(jcd(sv z(oweS9xb+e1NW#W(nQ?UNSUgrkmdurND)3@CIiX;zlA^?p4tE>1(*7{!)Cwsk{8m0 zD&MgbM>rX zY`(`;U=pgW8jWItDx9wkYnP#h)wG+*Ftad;4l)-kS5m>C=U39Wh^c3o{OU)yo3V4! zrak>UZ|nxZ)>bk?8ZXA^KKl zyB%;g112vE4Pvbb&=TcvM(AdFJa$bpmc8=B7z ze74di@B9(lf6wi_?H;}rU+O8te}et~lbA|D9^39ZXUElv~& zRkP?6my;%JYK-yZtBsW8j|2e5x;Chf4hUGd#7+E>2!YxYKz+G13?t=d70``-7zZ4ol^s zvcwnzLc^jZAPtUl_TU8!9uCO;^X@0}->Q=Ku?RAv1Irh_WiQTI&C<~jm@4b>{Ize4 z0$D?SwZ~WANhsCm5aW(iM(gDYQSk_V{AzQaoKCzX3VQiE)6=OMpJ}^~VyPzWHvjcF zCe?4sNm>mZsgtYb#z4;z{D}0d3%bEn8z)AcE<5R6MXL_6M)Hx95Ug6#Qo%@Gnqc_m z$XKHHA>KNb5=SG?+JmBfFOnMD*V-#HBmL?|=moMY*~UrNq;1A~c?TVQsD0_P2inzt zc-*SS6xm|NTS<8h+Ovx~W`^Y*=}4gw9i0y<(#t0&S8r{sN6zy= zF;*$~wUI54V_kA%*(Wd@VxgIR6kW;HiMdCNj}V2dJUBJ3bz>q$^Q;-GRO|gWxv^|c zK$c&{`4AKSnhG`*vbjzpDPOzQaC_{&yA5NUXq`@CjtG(nq3SnxR^0rS>jDN6HEQP7 zE0^1LyB*CL*it7ZkY(Bc^U^?2-2^vYl#fCd>j)%gv`2G2>sst>3ql7`+iG?ls~}8|D!EmUt9N1+H{L+jH1J95$K1$o z5PL1^@R`pQz-q1P=Ddnj8>7+$SVOF`<2hu_MztSJgeRv*WD+vVTBK46A`UpdrCCv* zX0uTUg8`IB7=oG32sh4~MF{Xkh-+aDItR+9YniQxbHnM5e3y=hq-_055BjuO)8e<~ z$?QcN0ukKUG`B>{5`)Jg42EujV5PX(;1Qd!qEpKVr9EBLQ*V*>rQ|=`%Gfw2r*rJm9nf%p&*O-u@^$Ak zO<@4Rl%c&0de%B?=gLKP`PDaCYZsa)%_b3;bC@YFUD;}9pLM3~w*7cOk8t3b@wKtV zp8oqop6xoF|HdmH%^4!B?Y8UoHq3`lft_~h$8PYH1X~yqNd=at#;4%R)90%H^pI!U0?%M-18TXnX9y zCP$%#z~i{1)~gJD={IRtFjGh3Je$j=;p-oI)rw22ZQbgXO5mqfv@x`>eqSqNIZU~$ z-@MOxS@8B-DvlE^MF==6oE~Y@p4k+eP*)mDq+hsd2w^$)Pqt4O5D^f%IC(ItTKuD($B>!bDjHI1|FXII_KrTz3E2q1V! zo7%?g>hyEIXp`9|;IngSamb@>)gpUiX4HsuC1?C|)#tdKcbw#XKDe;LzWTpkbH+ai zZJ0`HZffM7j)`{dPcO6s4?V(;KZL8qsa@1pDYe%~4Z*5D*SJo+6RY zo444D>GA`ry{^_;*0jtf?RBJ;C6`b*+XK^GE@#@T-L0AxxqU4UpoV1i5jB6c6%G*zll0CvOhtl%F0fHj`9&AF3qQiG0D`y*qOBRoI#lgu zm0Y?q#s|FC4gCTjn1eGW%96`w*0>fbI-lo~;RV*rPIYxiN3w>h%RjuTPDDhcqU^7u zh>{IP77ep)_ki(FUv7CAVAnnBgGX1OI+)GuNHZUpD};^ed%e`~NKj~pnh@66U+%x# z<}Y23(8EasB%&TNUQ6|Nn6i@{`q5ojuN2Ivg%kcBn>%*_31QsyEv9T-z0RIFHezxw zxDM2B;wVJrIM65=LOU~=NlAE7(9j_$b}8?W!}e!GXrZT$mmpEF@PM>+<6x9HT^^;X zL~W5Y(alb)T@9m|8)4b{!bxqcGtk1pl`~(mTOWHKHE)5RSC-JJ+}DQ@b|QECm+E33 zVTdC~E*rG`yqAv+sb;k)uo%14Fa7-nv=l$F@uN%F4Xr&3h#_=~&_R=t2Lpz=#$5SD9d{9XfHI8<0W+5UX{<@WIt zzGlUhRkm*7Vq3a+0UP~B+c(ZW+78^W$#&nSjAa^CikY*6!bh#QPeO?Uu&)MNesTe` z-ClkpGGCpp&>FY?25Y?xHx})TpeFqAyC@j98Zlz{t{DV2S-Q)jeU)8%!>yi%Et1ym zvs0Ka$4oU!4%}rIt1Ffes%A%=5O7=RmH+@MWl2OqR3uk%jL`iMzO?ZzFAwWOjPG$= z3{_azxBl%yxbDtCkktK=x;&}0beyM^ie0WP5yT&T;u&Krla-f?q0@gHAI2&oFoB)` zE3TNdtifHjkRx*6x%xiKXTr0Vy(^jx4@E23%1MSJjylF_(WLnj4r5jKf#@=<0&S#q zM@Gb7pG=yjJ~Gu>7#~ue=1a8%-ssvxF2tkGqA7~s#Xg~D?c&RSn&i>{XS|*F?Qcgs zm$0m{&#+jwbdf#ve4|xWRIn$7W4C0(p+--LwNO7jn?PB`c2#Q@q4U(G~@pV5WgT!xDlM6KmOnyY$+T23N*FhJoB10an1YoJMOe~j1@6^A88C4 zslD;>d@ZNb-gy2AyY#AC7ywn=(6`Ut>Wp$r= zPApMGT&|0aayn{F>qw_r9%?>d5?AFlQ_%QTJ$hJYk6H=M*drZ>-D+gAZvtE3E)0(^7E6+`JFn6$q?&N@c#UMqT*-AhmJ=Q$$P5bz>CzRsp)gaq-S*sJf+$$<3|J8hshQf?z0vM_APS-rCrkh@%dCF< zM9=T|FJEFc^(C=Cl*k!uk1blf%ydqo1huyMIg5T?4Pj@I&FX5$c^#sJUSNlP{4o2k zBX+R4^uuU`kz8+Q6u7SZ#V>3%-7-isQU1Ec4fqwq^(TG(6Hb^a{C#U-#LalG1^FNx zu30jluyQnFb)J(@UHF-;aiS$Xotn|?5~Nu?`wjcy4=%AE|KeJfC(3Nuas+MH*Rf#XL)(V6Rs$SY&HIPFIdhhW>Ews@jATk>o=9k^RQ1G?qgE&kM0hTP#^)dyt>NMw6cfAg%LD5r3h#3&Hu2MUwz5baGf*b z1Lz_|DURPpD8o7JYwWK35}awM=e~+cYnV9M^ZVB27ug6lR%^6rUas~~vMotYMdY(N zs+Us|gV+<&vILx|Jj#l>fK9dXb3kbOPCD`PHimaG{o&jJyaDN_vZmhbwfpVnTWti^9OP7a+U9Z_rDba+{6s!jrUrw3(r4stI#QotE-VFU0gK`$4sOAPCEU6 z!V}NF8lHOcsWA1ar^DZ$dLleMjprwy4maO+L-^TGe-utX^)nt<@wxR+ye;ISdjWTC?3@!k zUR7BMX>-DmJl*jh4eR{%$yaoxy&Lk8A)fUV>sP=lA2Q%+7MukyH+72G2(dd+mVqO#ikzVnl7 z!U`@n@9f$bHjq}gi0f{5suQn-2X^<~@MDiF*a_~#3Zp6C`t9}(GZ!pXWp$-^Y`b^3 zUuGVVAnMfl(J@XML_dWgLwpdljBX63eS(Tm}Tt(jl4W@_KNV2H%RRgOvlFi-Rt-LOjv|DW;r*MBm@L5uTQ<(<5$&;4THHh4t7P`Vi|Wl{G8VfJ5aIT z1x3vGsz(Hsw5B|6(Ro*qvUA0Y;j>5Wi&l&yjs%pc3VEQXl2-6#C^Qs*A}X7 z)DDAL8enQ+Xj>hA@;{&QxK-E%(U{7?Z1m6(ktY4+*_T5bMj04Zp@ZgO^2*!hHibyO zYT+wkJC!$hWEh^$v>uvLP@wzzE$n*WF=5`)XlKE@UAUtA=)L5QaMx{Dd%T*lwf-yE zRwibE|NFvAorVdBY{c~x%0spVI#<6PKDK8Z3+(>)GK)sW`c6LJ@Gxh2GZ7V+IxD@` zSRSq6w(BqRvN-nVG|aLGhuVgi*Ap){c}&>@NXJ8HZwbHp!I_?>3OwV?e;899Y0k-C z`<0vVRkb>W&Rs&Fjp3m?ul4xjG5Ia=At!fmC@ZgYzv-{cRkg6+;nm|wZy(Gd_zk%K zL=~bxE4h z;e~~(BN#9a(**-^BM{v(l+hI$Uw*>lRWsO_FSu>$oFRoFXRr+JSh(qqd&BbOtHeW` zxUX5cB>eT>o5M~!jq~s_w+A2VH(~O&?sv&Gcc*s$C_{)$iH^@Qj=_lNY7N(Z<4Yb_ z1CEKOGZY)FHn=`BoOQ;R!^Ic>D4h58?}VpkEc9I5njXOGm32V|?O$(*u&cusx)^VS z5kzWmB;KDm{`hdok1q-5pZ~w%);njou+_yt>~dzhA-wun9FS#L<#GZ?*;39AORUZ@ zlMWYM^sVp>EZxnC=HuPyjT@sOpSAO+d6^?h#)Kh5U;@;kxERAC!0n);J{|t@)IY+S z4K!S|4Pj+NYjb#E+N0sr6F=#3s>;h9<{}35lOpVHedtLiG9At-Ht;M4)f48t_Kc^g z8#@*~0t+8GLqbh$t@}OpTEd|aQiNM7&RGpWmyoHqxi3EEam%Wz!=OQfcpielQSW|_ zzZugk&SBI@to!kN`7eJL<}7S>13liwSYli} z@6B-2@1syP1pD-8hn?V!O6;)j@rhGI6X>TVx(r7`iDaQi#!q^~i;v&sdDk~g@O0|i z;Rzuhzx}@9u7~~}mN&2QG1LW4&s+FvxaqoIc$>d11()!7%aMP*g1Y^%w=({RE84 zc?(e0wyM@`yqt!Uaa|8uPeA0?>#2P4*Ejwpql(ORGKYM;M_)-Yd6JdsxKv6zAXh%WayWsyw#e=?XZLKeRAR7(c<&--1vtw%;a!R2_lg8u6;o)e^4w z_J4UCHZuCS6kUjA3vVo({l#;_*Dts<+;-pNBn8tXOkuQ3Of~d2hZ`?F+v7FV0!#W% zbuTGpjPiTzXHE@IPk$q@Su@gOjxrT3!lW^*s(s$ znKd)~rBjwp&5e5(I=i8mNCRxzCuV>e3kRT+yLYM zVDS5JQb+tDZ^%f;tZ>o!kqK0cLZTPr_q|;#(jiHT`IcdEs}#wytgM{bESC7nN_nqf z)>RQ|Yp~E)QUb&A1ARWg@(PB9hDlQ#sf1Y1yt2@laJO4}V#XRv=9>W{!j5ICU;N-} z9)G)uwLVK?@gK9R!$RQ*1_XKRxXW(ggp*DUpZ(lPVc)%W&E#E*RX}IdT!ubjn^@)- zzI36RK6N7LUjk#w_`8#V+8OTs)uo=#__`?h>bh$I4;}F0R_vH5a2L|KQ6wJq|l9G_F-UMBlI!=s*;cY7Tc^7iaz3Op3%n`HUKk1|TW> z@FR{1C!Kh5IOXJ1!qG>3EaP8MDXofjE?8)2h_E~1%IC z-qY4sS0XIrhJ0D8QyJy^h8G%DGr>i5au5Ng1XeC#t_%P8>n$FCd_9^a;FLRLWGEi4 z`{jzl3rk{0MaD1tjnR6U;|JF6nisz0X)*p1%8H_ZiFz4JWRXeQ$jWfSiKo!c6T`_T zMNN+CR#iTVnR=e59gaX$H8u*1kIb0w^%NW>muRNzvoKktQ+?Z-!Z*KslBb(gAGHh0 zZ-n|pZRWGf9{W+|=fmed_t_xjw)`rdPCh0f$fFQ+A;@Lbu^%tLg~v9k+46X&ll8`K?|{zm`lGjbo)hRRRO*h85=dEX$3HKOEFJ?5 zC=F%$h#r*j&Tlz_WKDHVsH&_&LqI<<4qS@}Yy^w^!idk8U3GnsjY5fdve@a3qila> zeBAK!$Pnvc+!~i+qU<9AZ}PBO7sspMebsG`ctcp)p;^*Skr&iv^}J`i{+h&$DVDGj z`)5>DU)p1j`%E|yn)0b*kIRI+)+O$4#3XnnpV;^JJn@<{Y^fO{Ud#F;V5QD3;8JSu z%9-KteI|Qe^*(!#WsV$EmC65*Lp~8s{`?ohF~>!kt9nVeiR(mJw5=ixDTw{~@>>X& zYMoRWUXI$fA*^3K!}A-;CWTzI1KidaDym{Vum8*Ij%O}dD!RJg$0W3B+P$8x8k#L3 zLok&VOm+DEpPr8Rm1@0I?gwxKjQ;}w??X!}<|SLH>gqK!!U45HP}E`&oebj#`O^W< zdK3dLe#DTtV?6U=7b{Zna5uwQSyh9n)TnY2GVOQJ;o+583w);0Mv=~|C{-ov<4%$~ z0>H+Vjp3wYKki64dE$7?{EG3iuQj)CR+tW6{c>hJ3?G^WaTyvas%uflHF(-n&ibFQ ztkv}iaR%rkCn+oW(V!4nT)yy4Pow-AAaG+w6@_79+==j0$_!zgo4Va4%vv7nuUWA3 zerNpaLw8GK_{c| zhEE@PKukZOo;r*SLz!WT$g2IdiJJK0D{h9I#X4v-i=cBH%G=S|+8E9|D-y4XM&;Xh`_QeD#G&6Fd>>RJ@V+!hsLEM#a;rXAf)NVJ8oXM z=Hg^&WHhFiVf>@VM5cSn+20Dy$Z(p;M1>>?1o!l#hwjDUx}Tiub*U~d_E}t-{gM@)akFMAG_bvtKaKuYeN3uNYGQY?B?yP$7HRR1x7McAL~mJV#pT$ zaPQ+8Sc*^dF+>$C55cuNtXMQB9I$s3GPgyjDk+XkD<8ARHMMn{!8He*jJRKb4f^WZ z`Y@t6GMVddf6!;P!a6YuFHlXQO{?B~$nza}WCXHsIwm#PC z#N{i&|BkyJk;v~fAZjHdzQq0)?AyYt(;{3ZG>i?^Ofa}4%9)jKUtm&DD-9cYO`Eya zn?VkdrbvndUV3}>u|Ihn#jCHY&Zzr~OUlA%r!T5^@?lw~c5KXZ6f@!4aT79O_dNU$ zVE^7uOL%f~UVb{0ZUXo>swm=2(I}0@+Dw?r&BrWtZH?f9hHpf1sH>05q6&N9(PzCq znc)CV#yYfySx^4j^QoYooZeu9G=xtShM%9k*84q*&0s(}Q4y4SmWQh@{)(rqLom^f zs^Q==dj|2_DZIWY3X5@S_TKG%zfkRM8gTz<5Ma6mvhl&1-l}yreSt2e7|S)!qpg(j z%;@!D%wvqk&*qMfmr%z>ICjA<)a4NI=%3>4y!*a(;e{L*Ip`289?mtFJpx73JQnW0(>E$@C53hJx+odeWOcOv~zvXuoC;xC7h-hSz+J5^p`~71jYDz>*Q$W zdQ6FymX<@K)!$0P<|20S3@_r20X76JU9!YhG&R|OpZiVw=C>}ga?JR4d)~g)$J2O& zxh{A1TsCL1O=u*$$P0>kFv~iWi=UPn1kJdXUg$*CRE`iQIz4IQkdgj{mtJGjb*T+w z^X1UOk!o(50RY60?VdOk{CI#BXJ+_Y3if**?xx{ZLJ(N8-oF;>KaVa za++ccW}Myj14V&po<$dD7qv<1&9`Q;VKnX%|MTP1?ZA)5O{Sm`xULOelJp{cJRvXw zuvStv*8X_MAM88d{hnQO-OU~}al&|ZiML_)xz)=|&xk88X5-v>G0A4X^`>oTiR0nk zhn}{>4&NuKkvdO~{Jn}|zUEoo2^>?o@m3^|w97q=^$a z5wPAZo|b~A!WnXMr?dPMCr!4u-kiy4pj9?;n;q@i>u#_;_SnJ4p^tfD`WR9o`ta@f z%RFs)WrY=R+)?*Kw5%rWltZivctq^*D48$8J2$)7KJ&R#?3{03K~wXse)41tFLhh( zuu1mni!W(bO_a(ITn}bTu45_@^IV|JBU3TWOdFw66(|JbY(3 z;rOHe={kM)2bYHDUwtc1OvxoqY$(B}bE-vjX_?Xo^k|K~Tl0>NuxQTfY*M@|9RI0f zwpix3FS*iZY#n6fO;AQIk4vJ~DrHA#ZC=V6@3rByFZ|aQ^Vs{)l*dhXKJInv0Pxrwr$Wj6NL}Et z3SnjAtM6K;-`x73m))&ppELnwDyinWT6gT}q`yLG!T=4a<^DaBTqfQ5W z^2=elx=4^E8UIutgyqH8!+SKg+C$T#x#993|H!pJ?^WM4?5Iy38*aMgj?la&Zai!w zOIIQ&|Mzs%pgYoomS!|uzq>l@v)AtLsCQaU|KEK-a$xxFwKs&O6>&XpBM2jUBj@b` zVPfB}3tv6`ur20!%4z3^4b)X2@y*lfeIXhdD6Ow9f-?`?{_`!SSAFjK>vR3XdmFulfyUuH8tTP5dBW?012`CpJ!OM)J!eZwl7H?Pqa zELdbuPkYv8zdetc7FVotlT{fviS5RWV9NG8ay@0K)zy@_jOsH6>ZG#b#FJ+)Q!P=M z(Fr9P2Fy0_QJH@HZjh~CyULa~E#WAej?Q7YgEP9_or5?wH^wT*j&_Boswco`wxU^u zpOsdbehw*~!C$nviSyVxLdA@q*;(kwwfw>{RyDCK&e}O%g&nSVnk$+6#N%37eq&$L zvgOQ3B%|v_L`uodx7vmV?iSE&NbTbzJv~nvUAjJ2W%+G#l&}#CNAsBBygg?guzSYl z&Rc*|g*eP^YHDhkVQgpHZ9m1zIchexcB~B<#93+b(aoQE(&}>ck~Fz~kxy@)UvSg8 zW+4}`EVQ=VF&Lgvd4zMBLMJ!GRo868z4BTZq93_3RpwmN*`;#R1iGVTDRo+4ZLEf8 zbK76G)D9?z+c|z%KDNOsMq{m!3tu#Q;mg}bQg^xyP33u?Djhh${Ms7HuaP54)2Gj{ zmtT3ETO(ykI*W7O!)+4Adv@4yC#$Qgk{oH4^auTC^Yxg68-9^~jK6wZ&N>Nr|| z=eX#&OACOT>ZIEg=gpgMbLPyo>Ca8KS#QoJiO!f0v2h&3+G(eqxu$u%m2$SahFcp| zif*J9dgbWyDL*f&aTUvg@!P`LuW`{2G1YF0>gv{w7-(@ujIoCL%4pqB0ca-FA98*r zkIrOLEnUzwYxZLLE?x+vtDL&PTiy7U!;aO76YH7jX?#%+Z-??y&Gd;*I3_RkxcYqq z+PlTzjAodh6($AN6w$LaI*_KK6Gyl9yvF!J%p5tUvc#q)s%!he}dBhb3Zu zstb9FO33-)HIBo1rE<8#pFUr@Yy~znxVJ&LpfTOWofRW$t%|e8*tG)`j0fr-Ejn_( zUJY^_qwbC`+_?e%Ha0fe%vo>R^cl}_B(6!2MN2ZorfjpV?YZaPT;5b+cKm3am#AFg(wQ_MTSpGmiKQXG%#JjLVsK zCUlbr#+5T|W$u4kE=XdC;YpGL*%|9V%5j(zxQSD( zB5rsp;)($%Z#7m+EP|Wf^_v5C_h5P9I`LEg%FnChXZBTZrPswTJc%&B2hK%&p;g{l z&Q8?&1kXvU0jEHnpMMwUy+K_x27Mc+=cS7KgQpT4aWjxJ3-H#(*y9;8!C8G3UCl})!iU& zK#A-yF_3X7{Zz%ZbRcL5pB+c_jBphDNMmWseGa(2(HcQY8C`r6x+%{{c;2IrlRT2d zTlF)*__qpdAATYY3ZK~r2s#6$2cj+lFY9knau(ufxb(x!cd3?NT%s_?O2zkKwn^-m ziv}fyh+AzTPMW~us(!?kr>gA>}2CduFMB&)$ zOh7Z_2&dRS<>n#;>L;xu$5tp!8jXbfr?1N9eVu0P zz#wu2Z*eAx^3lhI0WwQNJzr0gmK2Gw5_%X}>So9+#}gQq1TEzwgDeOn35Jll@IVkr zYntEzc;y#V(KE6Cg%|q7`t+A8(v&KD%PvhtS!fO9+((a&vq_J5Q_>dti^uK%&1;aPP4HIqNY0DPv>^I9zS7@)?FUzMopgeGimq`L89#T}kx5#jYU=&woE2Xm zOlkcij(8M(seAcxeDwMV&iV+0-h){ZDkpqXxiNR%9B1$mAe`=HLOh<*2t8C@90Tqn zUp`)Q7fifIogOJXn)ep{i0u?DQFK*al}@3Q)<#&wD9|gxGChng$~BtOwDd9?(e>5Y z@PR>!(+3(w+$tA!Q(KjEAkl{j0rkD36@y2};wUZrl&|BCFiDS&CI0-bUqWt2c~9u6 zlzYq}rgFUWyoFiODR%!jsekGhH;VNQ%rso7gvyTN${|gl2zcr>{q(Q>yOrn3(k~4F z<*q?Wf>;N&FET9mB=cCd5+yEE2Qk8A>?l2qPPY=p3K2>FM2NLigcuaz9Q{4-{snm6 z`qGC4{&7#9^d6bG{3V=-j;6_Ds`yPrcb-Emg!lgWBoXuj5ztI0hVm|kP3FQAmBBaR zVoWgy{mL(X((+>MVzqU%TZ(g@+T+G_N~<(!dcF7M6yN${#Hhqg8>t8`1f;c%3@YZP zI;8$-Y02;A-f2Pa_Uky2rX_Jwe*bLx&-!CFil--ywwPAmDM^;E%sgwZDu(2y6Eacz z#)>*M7T2FL6OruW#!Y%g^}|E`oX;j~cJxa_Qjn^l^px7*+mIaFil2o#v@4G$r#U!2T90yFMFJih_LyzkPZy~p&s#3#8jENBg zn-F~x0M1J|6h?Ry|4M0<%2oWRIq@L9l2y7~8JCw>S|Tt{@uc}uoW2I|vUP5N@oyd6 z{(ng&f*47;1`6mSM2VxpJ716g6b?9oZ5lJf*vLcq$i@7GS$%HPRMFq#_X|iL)89<3 zBDN=q2#u66Q|C=_IM(7Fd1R^*LqtTx}SP}DdM+6Ql~t; zBo!7P6$Il*Uj+fBcROgxz?lkl@XV659!JUF$!Zm}X{;o&`=%l4F+I!0-&oi99sT*< z>^1uLdGmj1;BUV|`aSl`B#p1)Hph8ENRIDJVm)MDlIQsD zSfWSAffN^Ecofke{ywWy<7Cr8TiG0MDcjSjKK=9Wt6!hL!jm%j@h27HQKLs+E~-c0 zTM{Xgo`)?vAF~RI<><+?=R5UDpJML#`a@q?dXk{T4g}99wmasO#!@j3n~X=APb?*l ztXNVyl9aQ4+O^Sl4uIo zC*$o;Y0cvvj&$bFxl#QQD9u3>s@U_AEE}b1JhQF%+LTgmMFs;&J}d};AdXb(VtpdQ zY)xq?a`#Uw(w~z56p8wL-Z|_yPaEIUl>8LrkepCNPa4kEFcR%L_=qlwQwji$1C&AH~%t$(y-VjF!O@5@j0e z5sAFHOAAaffpF(misyyOBl*N0iy>*c^jE&Pq>uX2C|vm@d8PG9-<77HOM}y27k1M& zC%xPkeY3K?o)HuJ;`D#&7vHND-&34-FQUhqCcey8rM;DUT3RNB;^mHOZ#;4gN%BrJ zLDj2UwqpM%y810{_Kf-KTYmA4hs~a&Ps|{F-aL*9OUh02Ny^Vuqkq!(dw!QhX$fLN zjwQ5POepowWV=~<#g6rpR;sUGJfAeNYa*3{!pV~)X;8%Gm`k5$8mcsMsz-!je@Mo# zclnmy-W@`Ezw+djkSi)dIhirrKj-)yYq9y5m2adMFe5#N;t zBm?neBuL8_aY%}`$)2jTczy7UDgX6vavN~}nGlF*(pR^4zHedp^+n%`CzX)k>A#qo zR2nr(yqVjAR~!MEfRtfvmQb&oH`%?EFa~4>YC6-GclzNmQ;_`nk|j~_ls_(#o#cU^ z=q=w&C3)`Wl62808H9b=MQ_E)Bv0Fz@gOkGM8D}LCd~w8qNm|$noZ-$H`9icD5Svg zB%7u45;yhJn-}NlG6j1~zP9iiefz%2A(LgAn|?E%@v(o_eTn_4f2zKq6n1?+TYl{J=}RHa5)4S(K#m_h zi7ED#-4|O?`usP0+rqeuDbw`n?>qB|IVIoyqiqrpU$z9LOf*lsX$EmrsGUhlf+NGy z1D}X&n>Qh2MS3a4iBm?4j z3;UocM($3TCSmCiR!)v_J|-0&@k&o0)D3Lz`=AgUNIw990SF90U;qLG5cod?fhdyv zAFA*`RR $@ + + +insts.txt: build/${mlirFileName}.mlir + aiecc.py -v --aie-only-generate-ipu --ipu-insts-name=$@ $< + +build/conv2dk1_i8.o: ../../../aie_kernels/aie2/conv2dk1_i8.cc + xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ + +build/final.xclbin: build/${mlirFileName}.mlir + cd build && aiecc.py -v --aie-generate-cdo --aie-generate-ipu --no-compile-host \ + --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + +clean: + rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log* *.xclbin sim \ + chess* *.o insts.txt \ + *.log aie_partition.json *.bin BOOT.BIN _x test.exe +run_py: + ${powershell} python3 test.py \ No newline at end of file diff --git a/programming_examples/ml/conv2d/README.md b/programming_examples/ml/conv2d/README.md new file mode 100644 index 0000000000..81b25f3e52 --- /dev/null +++ b/programming_examples/ml/conv2d/README.md @@ -0,0 +1,67 @@ + + +# Convolution 2D +## Introduction +Convolution is a crucial part of various machine learning and computer vision tasks, such as image recognition, object detection, and image segmentation. This README provides instructions for implementing convolution on AI Engine. + +At its core, it is a mathematical operation that combines an input image and a filter to produce an output image. The input data is represented as a multi-dimensional matrix, such as an image with height, width, and channels (e.g., RGB channels). The filter is also represented as a multi-dimensional matrix with filter height, width, input and output channels (the same number of channels as the input data). The filter is systematically applied to different regions of the input data. At each step, the filter is element-wise multiplied with the overlapping region of the input data. The element-wise products are summed up to produce a single value, which represents the result of the convolution operation for that region. This process is repeated for all possible regions of the input data, producing an output matrix called the feature map. + +The process of applying the filter to different regions of the input data is often visualized as a sliding window moving across the input data. The size of the sliding window corresponds to the size of the filter, and it moves with a certain stride (the number of pixels it moves at each step). The convolution operation consists of seven nested loops, iterating over the input height, input lenght, input channel, output channel, filter height, filter length, and the batch size, each loop corresponding to different aspect of the operation. This systematic process extracts features from the input image, yielding the output feature map, illustrating the computational intricacies of convolution. + +## Acceleration Techniques +1. Kernel Optimzation: To optimize convolution operations on AIE, we vectorize the code using AIE vector intrinsics. We load 8 elements of the input channel into vector registers using vector load intrinsic. We apply the convolution operation on this loaded data, utilizing for enhanced computational efficiency. To ensure accurate convolution results, particularly at the edges of feature maps, we implement zero-padding to handle boundary conditions. This comprehensive approach optimizes convolution processing on AIE, facilitating efficient and accurate feature extraction in neural network applications. Input is 4x8 matrix corresponding to 4 element of row and 8 input channels. + +2. Quantization: We use int8 precision for activationa and weights. At int8 precision, AIE offers the highest compute density with 256 MAC/cycle. + +3. Data Layout: Optimize activation and weight layout to enhance memory access patterns and enables effective utilization of AIE parallel processing units, ultimately improving the performance of 2D convolution operations. + +## Data Layout +We need to ensure that the data layout is compatible with efficient SIMD processing and rearrange the input data into a format where contiguous elements represent consecutive X-dimension values for each channel. For more efficient processing, we adopt a channels-last memory ordering, denoted as NYCXC8, to ensure that channels become the densest dimension. Operating on 8 elements simultaneously, we process 8 channels with the same width at once. Subsequently, we traverse the entire width dimension, handling the remaining channels in batches of 8. This process continues row-wise, resulting in our final data layout pattern: NYCXC8. This optimized layout enhances memory access patterns and enables effective utilization of parallel processing units, ultimately improving the performance of 2D convolution operations. This transformation ensures that data can be efficiently loaded into SIMD registers and processed in parallel. + +YCXC8 Input/Output Data Layout: + +In the YCXC8 (with N=1) data layout, the data is organized in memory as follows:: + +* Y: Represents the output feature map dimension. +* C: Denotes the number of channels. +* X: Represents the input feature map dimension. +* C8: Indicates that 8 elements of the input channel are processed together. + +OIYXI8O8 Weight Layout: + +We align the weight layout as specified: O,I,Y,X,I8,O8, to match the input image processing. We first load the weight tensor, organizing it to match this layout, where dimensions represent: output channels, input channels, kernel height, kernel width, input channel groups of 8, and output channel groups of 8. By aligning the weight layout in this manner, we enable seamless integration with the input data layout, maximizing parallelism and minimizing memory access overhead. + +In the OIYXI8O8 data layout, the data is organized in memory as follows: + +* O: Denotes the number of output channels. +* I: Denotes the number of input channels. +* Y: Represents the kernel height. +* X: Represents the kernel weight. +* I8: Indicates that 8 elements of the input channel are processed together. +* O8: Indicates that 8 elements of the output channel are processed together. + +## Compilation +To compile the design: +``` +make +``` + +To run the design: +``` +make run +``` + +### Prerequisites +To install the dependencies, run the following command: +``` +pip install -r requirements.txt + +``` \ No newline at end of file diff --git a/programming_examples/ml/conv2d/aie2.py b/programming_examples/ml/conv2d/aie2.py new file mode 100644 index 0000000000..74a2c38838 --- /dev/null +++ b/programming_examples/ml/conv2d/aie2.py @@ -0,0 +1,263 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +import sys + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.dialects.scf import * +from aie.extras.dialects.ext import memref, arith +from aie.extras.context import mlir_mod_ctx + +width = 32 +height = 32 +in_channels = 64 +out_channels = 64 + +if len(sys.argv) == 3: + width = int(sys.argv[1]) + height = int(sys.argv[2]) + + +actIn = width * in_channels # 32*64 = 2048 +bufIn = actIn * 2 # double buffer +actInInt32s = actIn // 4 + +weights = in_channels * out_channels +weightsInInt32s = weights // 4 + +actOut = width * out_channels # 32*64 = 2048 +bufOut = actOut * 2 # double buffer +actOutInt32s = actOut // 4 + +enableTrace = False +trace_size = 16384 +traceSizeInInt32s = trace_size // 4 + + +def conv2dk1(): + with mlir_mod_ctx() as ctx: + + @device(AIEDevice.ipu) + def device_body(): + + actIn_ty = T.memref(actIn, T.i8()) + bufIn_ty = T.memref(bufIn, T.i8()) + + weights_ty = T.memref(weights, T.i8()) + + out_ty = T.memref(actOut, T.i8()) + bufOut_ty = T.memref(bufOut, T.i8()) + + # memRef_3x3_ty = T.memref(3, 3, T.i16()) + + ofifo_actIn_ty = TypeAttr.get(ObjectFifoType.get(actIn_ty)) + ofifo_bufIn_ty = TypeAttr.get(ObjectFifoType.get(bufIn_ty)) + + ofifo_weights_ty = TypeAttr.get(ObjectFifoType.get(weights_ty)) + + ofifo_out_ty = TypeAttr.get(ObjectFifoType.get(out_ty)) + ofifo_bufOut_ty = TypeAttr.get(ObjectFifoType.get(bufOut_ty)) + + # AIE Core Function declarations + conv2dk1_i8 = external_func( + "conv2dk1_i8", + inputs=[ + actIn_ty, + weights_ty, + out_ty, + T.i32(), + T.i32(), + T.i32(), + T.i32(), + ], + ) + + # Tile declarations + ShimTile = tile(0, 0) + MemTile = tile(0, 1) + ComputeTile2 = tile(0, 2) + compute_tile2_col, compute_tile2_row = 0, 2 + + if enableTrace: + flow(ComputeTile2, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1) + + # AIE-array data movement with object fifos + # Input + of_inOF_act_L3L2 = object_fifo( + "inOF_act_L3L2", ShimTile, MemTile, 2, bufIn_ty + ) + of_act_L2_02 = object_fifo("act_L2_02", MemTile, ComputeTile2, 2, actIn_ty) + object_fifo_link(of_inOF_act_L3L2, of_act_L2_02) + + # wts + of_inOF_wts_0_L3L2 = object_fifo( + "inOF_wts_0_L3L2", ShimTile, [ComputeTile2], 1, weights_ty + ) + + # Output + of_out_02_L2 = object_fifo("out_02_L2", ComputeTile2, [MemTile], 2, out_ty) + of_outOFL2L3 = object_fifo("outOFL2L3", MemTile, [ShimTile], 2, bufOut_ty) + object_fifo_link(of_out_02_L2, of_outOFL2L3) + + # Set up compute tiles + + rtp2 = Buffer(ComputeTile2, [16], T.i32(), "rtp2") + + # Compute tile 2 + @core(ComputeTile2, "conv2dk1_i8.o") + def core_body(): + y_dim = 32 + x_dim = 32 + ci = 64 + co = 64 + + for _ in for_(0xFFFFFFFF): + elemWts = of_inOF_wts_0_L3L2.acquire(ObjectFifoPort.Consume, 1) + + scale = memref.load(rtp2, [0]) + # scale = memref.load(rtpComputeTile2, [0]) + + for _ in for_(y_dim): + elemIn = of_act_L2_02.acquire(ObjectFifoPort.Consume, 1) + elemOut0 = of_out_02_L2.acquire(ObjectFifoPort.Produce, 1) + + call( + conv2dk1_i8, + [ + elemIn, + elemWts, + elemOut0, + arith.constant(x_dim), + arith.constant(ci), + arith.constant(co), + scale, + ], + ) + + objectfifo_release(ObjectFifoPort.Consume, "act_L2_02", 1) + objectfifo_release(ObjectFifoPort.Produce, "out_02_L2", 1) + yield_([]) + objectfifo_release(ObjectFifoPort.Consume, "inOF_wts_0_L3L2", 1) + yield_([]) + + # To/from AIE-array data movement + + tensorSize = width * height * in_channels + tensorSizeInInt32s = tensorSize // 4 + tensor_ty = T.memref(tensorSizeInInt32s, T.i32()) + memRef_wts_ty = T.memref(weightsInInt32s, T.i32()) + # memRef_16x16_ty = T.memref(16, 16, T.i32()) + + @FuncOp.from_py_func(tensor_ty, memRef_wts_ty, tensor_ty) + def sequence(I, W, O): + if enableTrace: + # 0x340D0: Trace Control 0 + # 0xAABB---C + # AA <- Event to stop trace capture + # BB <- Event to start trace capture + # C <- Trace mode, 00=event=time, 01=event-PC, 10=execution + # Configure so that "Event 1" (always true) causes tracing to start + ipu_write32( + column=compute_tile2_col, + row=compute_tile2_row, + address=0x340D0, + value=0x00010000, + ) + # 0x340D4: Trace Control 1 + ipu_write32( + column=compute_tile2_col, + row=compute_tile2_row, + address=0x340D4, + value=0x00000000, + ) + # 0x340E0: Trace Event Group 1 (Which events to trace) + # 0xAABBCCDD AA, BB, CC, DD <- four event slots + ipu_write32( + column=compute_tile2_col, + row=compute_tile2_row, + address=0x340E0, + value=0x4B222125, + ) + # 0x340E4: Trace Event Group 2 (Which events to trace) + # 0xAABBCCDD AA, BB, CC, DD <- four event slots + ipu_write32( + column=compute_tile2_col, + row=compute_tile2_row, + address=0x340E4, + value=0x2D2C1A4F, + ) + + ipu_write32( + column=compute_tile2_col, + row=compute_tile2_row, + address=0x3FF00, + value=0x00000121, + ) + + # Configure a buffer descriptor to write tracing information that has been routed into this shim tile + # out to host DDR memory + trace_bd_id = 13 # use BD 13 for writing trace output from compute tile to DDR host memory + output_size = bufOut + ipu_writebd_shimtile( + bd_id=trace_bd_id, + buffer_length=trace_size, + buffer_offset=output_size, + enable_packet=0, + out_of_order_id=0, + packet_id=0, + packet_type=0, + column=0, + column_num=1, + d0_size=0, + d0_stride=0, + d1_size=0, + d1_stride=0, + d2_stride=0, + ddr_id=2, + iteration_current=0, + iteration_size=0, + iteration_stride=0, + lock_acq_enable=0, + lock_acq_id=0, + lock_acq_val=0, + lock_rel_id=0, + lock_rel_val=0, + next_bd=0, + use_next_bd=0, + valid_bd=1, + ) + # Set start BD to our shim bd_Id (3) + ipu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id) + + IpuWriteRTPOp("rtp2", col=0, row=2, index=0, value=10) + + ipu_dma_memcpy_nd( + metadata="inOF_act_L3L2", + bd_id=0, + mem=I, + sizes=[1, 1, 1, tensorSizeInInt32s], + ) + ipu_dma_memcpy_nd( + metadata="outOFL2L3", + bd_id=2, + mem=O, + sizes=[1, 1, 1, tensorSizeInInt32s], + ) + ipu_dma_memcpy_nd( + metadata="inOF_wts_0_L3L2", + bd_id=2, + mem=W, + sizes=[1, 1, 1, weightsInInt32s], + ) + ipu_sync(column=0, row=0, direction=0, channel=0) + + # print(ctx.module.operation.verify()) + print(ctx.module) + + +conv2dk1() diff --git a/programming_examples/ml/conv2d/requirements.txt b/programming_examples/ml/conv2d/requirements.txt new file mode 100644 index 0000000000..08ed5eeb4b --- /dev/null +++ b/programming_examples/ml/conv2d/requirements.txt @@ -0,0 +1 @@ +torch \ No newline at end of file diff --git a/programming_examples/ml/conv2d/run.lit b/programming_examples/ml/conv2d/run.lit new file mode 100644 index 0000000000..1eeef90b94 --- /dev/null +++ b/programming_examples/ml/conv2d/run.lit @@ -0,0 +1,10 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, chess, torch +// +// RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DINT8_ACT -c %S/../../../aie_kernels/aie2/conv2dk1_i8.cc -o conv2dk1_i8.o +// RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %run_on_ipu %python %S/test.py | FileCheck %s +// CHECK: PASS! \ No newline at end of file diff --git a/programming_examples/ml/conv2d/test.py b/programming_examples/ml/conv2d/test.py new file mode 100644 index 0000000000..1dc847d8fe --- /dev/null +++ b/programming_examples/ml/conv2d/test.py @@ -0,0 +1,149 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +import torch +import torch.nn as nn +import sys +import math +from aie.utils.ml import DataShaper +import time +import os +import numpy as np +from aie.utils.xrt import setup_aie, extract_trace, write_out_trace, execute + +torch.use_deterministic_algorithms(True) +torch.manual_seed(0) + +design = "conv2d" +xclbin_path = os.path.abspath("build/final.xclbin") +insts_path = os.path.abspath("build/insts.txt") + +log_folder = "log/" +if not os.path.exists(log_folder): + os.makedirs(log_folder) + +num_iter = 1 +npu_time_total = 0 +npu_time_min = 9999999 +npu_time_max = 0 +trace_size = 16384 +enable_trace = False +trace_file = "log/trace_" + design + ".txt" +# ------------------------------------------------------ +# Configure this to match your design's buffer size +# ------------------------------------------------------ +dtype_in = np.dtype("int8") +dtype_wts = np.dtype("int8") +dtype_out = np.dtype("int8") + +shape_total_wts = (4096, 1) +shape_in_act = (32, 8, 32, 8) #'YCXC8' , 'CYX' +shape_in_wts1 = (8, 8, 1, 1, 8, 8) # out,in,ky,kx,in8,out8 +shape_out = (32, 8, 32, 8) + +# ------------------------------------------------------ +# Initialize activation, weights, scaling factor for int8 model +# ------------------------------------------------------ +int_inp = torch.randint(1, 20, (1, 64, 32, 32)).type(torch.FloatTensor) +int_weight = torch.randint(50, 80, (64, 64, 1, 1)).type(torch.FloatTensor) +conv_scale = 7.6294e-06 # scale to convert int8 output to floating point +int8_scale = 0.0078 # scale to convert int8 output to floating point +min = -128 +max = 127 +# ------------------------------------------------------ +# Get device, load the xclbin & kernel and register them +# ------------------------------------------------------ +app = setup_aie( + xclbin_path, + insts_path, + shape_in_act, + dtype_in, + shape_total_wts, + dtype_wts, + shape_out, + dtype_out, + enable_trace=enable_trace, + trace_size=trace_size, +) + + +# ------------------------------------------------------ +# Define your golden reference +# ------------------------------------------------------ +class conv2d_int_model(nn.Module): + def __init__(self, in_planes=64, planes=64): + super(conv2d_int_model, self).__init__() + self.conv = nn.Conv2d(64, 64, kernel_size=1, bias=False) + + def forward(self, x): + out_int = self.conv(x) + out_quant = out_int * conv_scale # int8 x int8 leads to int32 output + out_float = int8_scale * torch.clamp( + torch.round(out_quant / int8_scale), min, max + ) # converting to int8 range + return out_float + + +# ------------------------------------------------------ +# Pytorch baseline +# ------------------------------------------------------ +model = conv2d_int_model() +model.eval() +model.conv.weight.data.copy_(int_weight) + +golden_output = model(int_inp) + +# ------------------------------------------------------ +# Reorder input data-layout +# ------------------------------------------------------ +ds = DataShaper() +before_input = int_inp.squeeze().data.numpy().astype(dtype_in) +before_input.tofile(log_folder + "/before_ifm_mem_fmt_1x1.txt", sep=",", format="%d") +ifm_mem_fmt = ds.reorder_mat(before_input, "YCXC8", "CYX") +ifm_mem_fmt.tofile(log_folder + "/after_ifm_mem_fmt_1x1.txt", sep=",", format="%d") + +wts1 = ds.reorder_mat(int_weight.data.numpy().astype(dtype_wts), "OIYXI8O8", "OIYX") +total_wts = np.concatenate((wts1), axis=None) +total_wts.tofile(log_folder + "/weights_mem_fmt_final.txt", sep=",", format="%d") + +# ------------------------------------------------------ +# Main run loop +# ------------------------------------------------------ +for i in range(num_iter): + start = time.time_ns() + aie_output = execute(app, ifm_mem_fmt, total_wts) * int8_scale + stop = time.time_ns() + + if enable_trace: + aie_output, trace = extract_trace(aie_output, shape_out, dtype_out, trace_size) + write_out_trace(trace, trace_file) + + npu_time = stop - start + npu_time_total = npu_time_total + npu_time + +# ------------------------------------------------------ +# Reorder output data-layout +# ------------------------------------------------------ +temp_out = aie_output.reshape(32, 8, 32, 8) +temp_out = ds.reorder_mat(temp_out, "CDYX", "YCXD") +ofm_mem_fmt = temp_out.reshape(64, 32, 32) +ofm_mem_fmt.tofile(log_folder + "/after_ofm_mem_fmt_final.txt", sep=",", format="%d") +ofm_mem_fmt_out = torch.from_numpy(ofm_mem_fmt).unsqueeze(0) + +# ------------------------------------------------------ +# Compare the AIE output and the golden reference +# ------------------------------------------------------ + +print("\nAvg NPU time: {}us.".format(int((npu_time_total / num_iter) / 1000))) + +assert np.allclose( + ofm_mem_fmt_out.detach().numpy(), + golden_output.detach().numpy(), + rtol=0, + atol=2 * int8_scale, +) +print("\nPASS!\n") diff --git a/programming_examples/ml/conv2d_fused_relu/CMakeLists.txt b/programming_examples/ml/conv2d_fused_relu/CMakeLists.txt new file mode 100644 index 0000000000..4b897cb29c --- /dev/null +++ b/programming_examples/ml/conv2d_fused_relu/CMakeLists.txt @@ -0,0 +1,89 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +# parameters +# -DBOOST_ROOT: Path to Boost install +# -DOpenCV_DIR: Path to OpenCV install +# -DXRT_INC_DIR: Full path to src/runtime_src/core/include in XRT cloned repo +# -DXRT_LIB_DIR: Path to xrt_coreutil.lib +# -DTARGET_NAME: Target name to be built + +# cmake needs this line +cmake_minimum_required(VERSION 3.1) + +find_program(WSL NAMES powershell.exe) + +if (NOT WSL) + set(BOOST_ROOT /usr/include/boost CACHE STRING "Path to Boost install") + set(OpenCV_DIR /usr/include/opencv4 CACHE STRING "Path to OpenCV install") + set(XRT_INC_DIR /opt/xilinx/xrt/include CACHE STRING "Path to XRT cloned repo") + set(XRT_LIB_DIR /opt/xilinx/xrt/lib CACHE STRING "Path to xrt_coreutil.lib") +else() + set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") + set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install") + set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") + set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") +endif () + +set(EDGEDETECT_WIDTH 1920 CACHE STRING "image width") +set(EDGEDETECT_HEIGHT 1080 CACHE STRING "image height") + +set(TARGET_NAME test CACHE STRING "Target to be built") + +SET (ProjectName ${TARGET_NAME}) +SET (currentTarget ${TARGET_NAME}) + +if ( WSL ) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}) +endif () + +project(${ProjectName}) + +# Find packages +find_package(Boost REQUIRED) +find_package(OpenCV REQUIRED) +message("opencv library paht: ${OpenCV_LIB_PATH}") +message("opencv libs: ${OpenCV_LIBS}") + + +add_executable(${currentTarget} + ${CMAKE_CURRENT_SOURCE_DIR}/../../../utils/OpenCVUtils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../../utils/xrtUtils.cpp + test.cpp +) + +target_compile_definitions(${currentTarget} PUBLIC + EDGEDETECT_WIDTH=${EDGEDETECT_WIDTH} + EDGEDETECT_HEIGHT=${EDGEDETECT_HEIGHT} + DISABLE_ABI_CHECK=1 + ) + +target_include_directories (${currentTarget} PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/../../../utils + ${XRT_INC_DIR} + ${OpenCV_INCLUDE_DIRS} + ${Boost_INCLUDE_DIRS} +) + +target_link_directories(${currentTarget} PUBLIC + ${XRT_LIB_DIR} + ${OpenCV_LIB_PATH} + ${Boost_LIBRARY_DIRS} +) + +if (NOT WSL) + target_link_libraries(${currentTarget} PUBLIC + xrt_coreutil + ${OpenCV_LIBS} + boost_program_options + boost_filesystem + ) +else() + target_link_libraries(${currentTarget} PUBLIC + xrt_coreutil + ${OpenCV_LIBS} + ) +endif() diff --git a/programming_examples/ml/conv2d_fused_relu/Makefile b/programming_examples/ml/conv2d_fused_relu/Makefile new file mode 100755 index 0000000000..80cb34dc08 --- /dev/null +++ b/programming_examples/ml/conv2d_fused_relu/Makefile @@ -0,0 +1,35 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +include ../../makefile-common + +mlirFileName = aieWithTrace_1core + +all: build/conv2dk1.o build/final.xclbin + +build/${mlirFileName}.mlir: aie2.py + mkdir -p ${@D} + python3 $< > $@ + + +insts.txt: build/${mlirFileName}.mlir + aiecc.py -v --aie-only-generate-ipu --ipu-insts-name=$@ $< + +build/conv2dk1.o: ../../../aie_kernels/aie2/conv2dk1.cc + xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ + +build/final.xclbin: build/${mlirFileName}.mlir + cd build && aiecc.py -v --aie-generate-cdo --aie-generate-ipu --no-compile-host \ + --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + +clean: + rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log* *.xclbin sim \ + chess* *.o insts.txt \ + *.log aie_partition.json *.bin BOOT.BIN _x test.exe + +run_py: + ${powershell} python3 test.py diff --git a/programming_examples/ml/conv2d_fused_relu/README.md b/programming_examples/ml/conv2d_fused_relu/README.md new file mode 100644 index 0000000000..68e7e9b8cf --- /dev/null +++ b/programming_examples/ml/conv2d_fused_relu/README.md @@ -0,0 +1,99 @@ + + +# Convolution with Fused ReLU + +## Introduction +Convolution is a crucial part of various machine learning and computer vision tasks, such as image recognition, object detection, and image segmentation. ReLU (Rectified Linear Unit ) is one of the most commonly used activation functions due to its simplicity and effectiveness. This README provides instructions for implementing convolution with ReLU activation function on AI Engine. + + +At its core, convolution is a mathematical operation that combines an input image and a filter to produce an output image. The input data is represented as a multi-dimensional matrix, such as an image with height, width, and channels (e.g., RGB channels). The filter is also represented as a multi-dimensional matrix with filter height, width, input and output channels (the same number of channels as the input data). The filter is systematically applied to different regions of the input data. At each step, the filter is element-wise multiplied with the overlapping region of the input data. The element-wise products are summed up to produce a single value, which represents the result of the convolution operation for that region. This process is repeated for all possible regions of the input data, producing an output matrix called the feature map. + +The process of applying the filter to different regions of the input data is often visualized as a sliding window moving across the input data. The size of the sliding window corresponds to the size of the filter, and it moves with a certain stride (the number of pixels it moves at each step). The convolution operation consists of seven nested loops, iterating over the input height, input lenght, input channel, output channel, filter height, filter length, and the batch size, each loop corresponding to different aspect of the operation. This systematic process extracts features from the input image, yielding the output feature map, illustrating the computational intricacies of convolution. + +## Acceleration Techniques +1. Kernel Optimzation: To optimize convolution operations on AIE, we vectorize the code using AIE vector intrinsics. We load 8 elements of the input channel into vector registers using vector load intrinsic. We apply the convolution operation on this loaded data, utilizing for enhanced computational efficiency. To ensure accurate convolution results, particularly at the edges of feature maps, we implement zero-padding to handle boundary conditions. This comprehensive approach optimizes convolution processing on AIE, facilitating efficient and accurate feature extraction in neural network applications. Input is 4x8 matrix corresponding to 4 element of row and 8 input channels. + +2. Quantization: We use int8 precision for activationa and weights. At int8 precision, AIE offers the highest compute density with 256 MAC/cycle. + +3. Layer Fused: We perform two levels of fusion. First, we fuse ReLU in convolution using SRS capabilities of AIE. Second, we fuse BatchNorm into convolution weights. + +4. Data Layout: Optimize activation and weight layout to enhance memory access patterns and enables effective utilization of AIE parallel processing units, ultimately improving the performance of 2D convolution operations. + +## Data Layout +We need to ensure that the data layout is compatible with efficient SIMD processing and rearrange the input data into a format where contiguous elements represent consecutive X-dimension values for each channel. For more efficient processing, we adopt a channels-last memory ordering, denoted as NYCXC8, to ensure that channels become the densest dimension. Operating on 8 elements simultaneously, we process 8 channels with the same width at once. Subsequently, we traverse the entire width dimension, handling the remaining channels in batches of 8. This process continues row-wise, resulting in our final data layout pattern: NYCXC8. This optimized layout enhances memory access patterns and enables effective utilization of parallel processing units, ultimately improving the performance of 2D convolution operations. This transformation ensures that data can be efficiently loaded into SIMD registers and processed in parallel. + +YCXC8 Input/Output Data Layout: + +In the YCXC8 (with N=1) data layout, the data is organized in memory as follows: + +* Y: Represents the output feature map dimension. +* C: Denotes the number of channels. +* X: Represents the input feature map dimension. +* C8: Indicates that 8 elements of the input channel are processed together. + +OIYXI8O8 Weight Layout: + +We align the weight layout as specified: O,I,Y,X,I8,O8, to match the input image processing. We first load the weight tensor, organizing it to match this layout, where dimensions represent: output channels, input channels, kernel height, kernel width, input channel groups of 8, and output channel groups of 8. By aligning the weight layout in this manner, we enable seamless integration with the input data layout, maximizing parallelism and minimizing memory access overhead. + +In the OIYXI8O8 data layout, the data is organized in memory as follows: + +* O: Denotes the number of output channels. +* I: Denotes the number of input channels. +* Y: Represents the kernel height. +* X: Represents the kernel weight. +* I8: Indicates that 8 elements of the input channel are processed together. +* O8: Indicates that 8 elements of the output channel are processed together. + + +## Fusing ReLU +Fusing ReLU into the convolution operation can further optimize the implementation by reducing memory bandwidth requirements and computational overhead. ReLU activation function introduces non-linearity by setting negative values to zero and leaving positive values unchanged. Utilize SIMD instructions to efficiently compute ReLU activation in parallel with convolution. After performing the convolution operation, apply ReLU activation function at vector register level. +We use `aie::set_rounding()` and `aie::set_saturation()` to set the rounding and saturation modes for the computed results in the accumulator. Seeting round mode `postitive_inf` rounds halfway towards positive infinity while setting saturation to `aie::saturation_mode::saturate` saturation rounds an uint8 range (0, 255). + +``` +::aie::set_saturation( + aie::saturation_mode::saturate); // Needed to saturate properly to uint8 +::aie::set_rounding( + aie::rounding_mode::positive_inf); // Needed to saturate properly to uint8 +``` +After convolution and ReLU fusion, the output data is generate in YCXC8 layout. Ensure that the output data layout is compatible with subsequent layers or processing steps in the neural network architecture. + + +### Benefits of ReLU Fusion: + +1. Reduced Memory Bandwidth: +By fusing ReLU into the convolution operation, unnecessary memory accesses and data transfers associated with separate ReLU computation are eliminated, leading to reduced memory bandwidth requirements. + +2. Improved Performance: +Fusing ReLU reduces the number of instructions executed per element, resulting in improved computational efficiency and overall performance of the convolution operation. + +3. Simplified Code Structure: +Fusing ReLU into the convolution kernel simplifies the code structure and reduces the overhead associated with separate activation function calls, leading to cleaner and more maintainable code. + +4. Enhanced Resource Utilization: +By combining convolution and ReLU operations, computational resources such as CPU cores or SIMD units are utilized more efficiently, maximizing throughput and achieving better resource utilization. + +## Compilation +To compile the design: +``` +make +``` + +To run the design: +``` +make run +``` + +### Prerequisites +To install the dependencies, run the following command: +``` +pip install -r requirements.txt + +``` \ No newline at end of file diff --git a/programming_examples/ml/conv2d_fused_relu/aie2.py b/programming_examples/ml/conv2d_fused_relu/aie2.py new file mode 100644 index 0000000000..be0167e3b4 --- /dev/null +++ b/programming_examples/ml/conv2d_fused_relu/aie2.py @@ -0,0 +1,263 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +import sys + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.dialects.scf import * +from aie.extras.dialects.ext import memref, arith +from aie.extras.context import mlir_mod_ctx + +width = 32 +height = 32 +in_channels = 64 +out_channels = 64 + +if len(sys.argv) == 3: + width = int(sys.argv[1]) + height = int(sys.argv[2]) + + +actIn = width * in_channels # 32*64 = 2048 +bufIn = actIn * 2 # double buffer +actInInt32s = actIn // 4 + +weights = in_channels * out_channels +weightsInInt32s = weights // 4 + +actOut = width * out_channels # 32*64 = 2048 +bufOut = actOut * 2 # double buffer +actOutInt32s = actOut // 4 + +enableTrace = False +trace_size = 16384 +traceSizeInInt32s = trace_size // 4 + + +def conv2dk1(): + with mlir_mod_ctx() as ctx: + + @device(AIEDevice.ipu) + def device_body(): + + actIn_ty = T.memref(actIn, T.i8()) + bufIn_ty = T.memref(bufIn, T.i8()) + + weights_ty = T.memref(weights, T.i8()) + + out_ty = T.memref(actOut, T.ui8()) + bufOut_ty = T.memref(bufOut, T.ui8()) + + # memRef_3x3_ty = T.memref(3, 3, T.i16()) + + ofifo_actIn_ty = TypeAttr.get(ObjectFifoType.get(actIn_ty)) + ofifo_bufIn_ty = TypeAttr.get(ObjectFifoType.get(bufIn_ty)) + + ofifo_weights_ty = TypeAttr.get(ObjectFifoType.get(weights_ty)) + + ofifo_out_ty = TypeAttr.get(ObjectFifoType.get(out_ty)) + ofifo_bufOut_ty = TypeAttr.get(ObjectFifoType.get(bufOut_ty)) + + # AIE Core Function declarations + conv2dk1_i8 = external_func( + "conv2dk1_i8", + inputs=[ + actIn_ty, + weights_ty, + out_ty, + T.i32(), + T.i32(), + T.i32(), + T.i32(), + ], + ) + + # Tile declarations + ShimTile = tile(0, 0) + MemTile = tile(0, 1) + ComputeTile2 = tile(0, 2) + compute_tile2_col, compute_tile2_row = 0, 2 + + if enableTrace: + flow(ComputeTile2, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1) + + # AIE-array data movement with object fifos + # Input + of_inOF_act_L3L2 = object_fifo( + "inOF_act_L3L2", ShimTile, MemTile, 2, bufIn_ty + ) + of_act_L2_02 = object_fifo("act_L2_02", MemTile, ComputeTile2, 2, actIn_ty) + object_fifo_link(of_inOF_act_L3L2, of_act_L2_02) + + # wts + of_inOF_wts_0_L3L2 = object_fifo( + "inOF_wts_0_L3L2", ShimTile, [ComputeTile2], 1, weights_ty + ) + + # Output + of_out_02_L2 = object_fifo("out_02_L2", ComputeTile2, [MemTile], 2, out_ty) + of_outOFL2L3 = object_fifo("outOFL2L3", MemTile, [ShimTile], 2, bufOut_ty) + object_fifo_link(of_out_02_L2, of_outOFL2L3) + + # Set up compute tiles + + rtp2 = Buffer(ComputeTile2, [16], T.i32(), "rtp2") + + # Compute tile 2 + @core(ComputeTile2, "conv2dk1.o") + def core_body(): + y_dim = 32 + x_dim = 32 + ci = 64 + co = 64 + + for _ in for_(0xFFFFFFFF): + elemWts = of_inOF_wts_0_L3L2.acquire(ObjectFifoPort.Consume, 1) + + scale = memref.load(rtp2, [0]) + # scale = memref.load(rtpComputeTile2, [0]) + + for _ in for_(y_dim): + elemIn = of_act_L2_02.acquire(ObjectFifoPort.Consume, 1) + elemOut0 = of_out_02_L2.acquire(ObjectFifoPort.Produce, 1) + + call( + conv2dk1_i8, + [ + elemIn, + elemWts, + elemOut0, + arith.constant(x_dim), + arith.constant(ci), + arith.constant(co), + scale, + ], + ) + + objectfifo_release(ObjectFifoPort.Consume, "act_L2_02", 1) + objectfifo_release(ObjectFifoPort.Produce, "out_02_L2", 1) + yield_([]) + objectfifo_release(ObjectFifoPort.Consume, "inOF_wts_0_L3L2", 1) + yield_([]) + + # To/from AIE-array data movement + + tensorSize = width * height * in_channels + tensorSizeInInt32s = tensorSize // 4 + tensor_ty = T.memref(tensorSizeInInt32s, T.i32()) + memRef_wts_ty = T.memref(weightsInInt32s, T.i32()) + # memRef_16x16_ty = T.memref(16, 16, T.i32()) + + @FuncOp.from_py_func(tensor_ty, memRef_wts_ty, tensor_ty) + def sequence(I, W, O): + if enableTrace: + # 0x340D0: Trace Control 0 + # 0xAABB---C + # AA <- Event to stop trace capture + # BB <- Event to start trace capture + # C <- Trace mode, 00=event=time, 01=event-PC, 10=execution + # Configure so that "Event 1" (always true) causes tracing to start + ipu_write32( + column=compute_tile2_col, + row=compute_tile2_row, + address=0x340D0, + value=0x00010000, + ) + # 0x340D4: Trace Control 1 + ipu_write32( + column=compute_tile2_col, + row=compute_tile2_row, + address=0x340D4, + value=0x00000000, + ) + # 0x340E0: Trace Event Group 1 (Which events to trace) + # 0xAABBCCDD AA, BB, CC, DD <- four event slots + ipu_write32( + column=compute_tile2_col, + row=compute_tile2_row, + address=0x340E0, + value=0x4B222125, + ) + # 0x340E4: Trace Event Group 2 (Which events to trace) + # 0xAABBCCDD AA, BB, CC, DD <- four event slots + ipu_write32( + column=compute_tile2_col, + row=compute_tile2_row, + address=0x340E4, + value=0x2D2C1A4F, + ) + + ipu_write32( + column=compute_tile2_col, + row=compute_tile2_row, + address=0x3FF00, + value=0x00000121, + ) + + # Configure a buffer descriptor to write tracing information that has been routed into this shim tile + # out to host DDR memory + trace_bd_id = 13 # use BD 13 for writing trace output from compute tile to DDR host memory + output_size = bufOut + ipu_writebd_shimtile( + bd_id=trace_bd_id, + buffer_length=trace_size, + buffer_offset=output_size, + enable_packet=0, + out_of_order_id=0, + packet_id=0, + packet_type=0, + column=0, + column_num=1, + d0_size=0, + d0_stride=0, + d1_size=0, + d1_stride=0, + d2_stride=0, + ddr_id=2, + iteration_current=0, + iteration_size=0, + iteration_stride=0, + lock_acq_enable=0, + lock_acq_id=0, + lock_acq_val=0, + lock_rel_id=0, + lock_rel_val=0, + next_bd=0, + use_next_bd=0, + valid_bd=1, + ) + # Set start BD to our shim bd_Id (3) + ipu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id) + + IpuWriteRTPOp("rtp2", col=0, row=2, index=0, value=1) + + ipu_dma_memcpy_nd( + metadata="inOF_act_L3L2", + bd_id=0, + mem=I, + sizes=[1, 1, 1, tensorSizeInInt32s], + ) + ipu_dma_memcpy_nd( + metadata="outOFL2L3", + bd_id=2, + mem=O, + sizes=[1, 1, 1, tensorSizeInInt32s], + ) + ipu_dma_memcpy_nd( + metadata="inOF_wts_0_L3L2", + bd_id=2, + mem=W, + sizes=[1, 1, 1, weightsInInt32s], + ) + ipu_sync(column=0, row=0, direction=0, channel=0) + + # print(ctx.module.operation.verify()) + print(ctx.module) + + +conv2dk1() diff --git a/programming_examples/ml/conv2d_fused_relu/requirements.txt b/programming_examples/ml/conv2d_fused_relu/requirements.txt new file mode 100644 index 0000000000..08ed5eeb4b --- /dev/null +++ b/programming_examples/ml/conv2d_fused_relu/requirements.txt @@ -0,0 +1 @@ +torch \ No newline at end of file diff --git a/programming_examples/ml/conv2d_fused_relu/run.lit b/programming_examples/ml/conv2d_fused_relu/run.lit new file mode 100644 index 0000000000..0c122f451e --- /dev/null +++ b/programming_examples/ml/conv2d_fused_relu/run.lit @@ -0,0 +1,10 @@ +// (c) Copyright 2023 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, chess, torch +// +// RUN: xchesscc_wrapper aie2 -I %aietools/include -DINT8_ACT -DBIT_WIDTH=8 -c %S/../../../aie_kernels/aie2/conv2dk1.cc -o conv2dk1.o +// RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %run_on_ipu %python %S/test.py | FileCheck %s +// CHECK: PASS! \ No newline at end of file diff --git a/programming_examples/ml/conv2d_fused_relu/test.py b/programming_examples/ml/conv2d_fused_relu/test.py new file mode 100644 index 0000000000..5bfe139112 --- /dev/null +++ b/programming_examples/ml/conv2d_fused_relu/test.py @@ -0,0 +1,151 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +import torch +import torch.nn as nn +import sys +import math +from aie.utils.ml import DataShaper +import time +import os +import numpy as np +from aie.utils.xrt import setup_aie, extract_trace, write_out_trace, execute + +torch.use_deterministic_algorithms(True) +torch.manual_seed(0) + +design = "conv2d_with_relu" +xclbin_path = os.path.abspath("build/final.xclbin") +insts_path = os.path.abspath("build/insts.txt") + +log_folder = "log/" +if not os.path.exists(log_folder): + os.makedirs(log_folder) + +num_iter = 1 +npu_time_total = 0 +npu_time_min = 9999999 +npu_time_max = 0 +trace_size = 16384 +enable_trace = False +trace_file = "log/trace_" + design + ".txt" +# ------------------------------------------------------ +# Configure this to match your design's buffer size +# ------------------------------------------------------ +dtype_in = np.dtype("int8") +dtype_wts = np.dtype("int8") +dtype_out = np.dtype("uint8") + +shape_total_wts = (4096, 1) +shape_in_act = (32, 8, 32, 8) #'YCXC8' , 'CYX' +shape_in_wts1 = (8, 8, 1, 1, 8, 8) # out,in,ky,kx,in8,out8 +shape_out = (32, 8, 32, 8) + +# ------------------------------------------------------ +# Initialize activation, weights, scaling factor for int8 model +# ------------------------------------------------------ +int_inp = torch.randint(1, 100, (1, 64, 32, 32)).type(torch.FloatTensor) +int_weight = torch.randint(50, 100, (64, 64, 1, 1)).type(torch.FloatTensor) +conv_scale = 0.0039 # scale to convert int8 output to floating point +relu_scale = 0.0078 # scale to convert int8 output to floating point +min = 0 +max = 255 + +# ------------------------------------------------------ +# Get device, load the xclbin & kernel and register them +# ------------------------------------------------------ +app = setup_aie( + xclbin_path, + insts_path, + shape_in_act, + dtype_in, + shape_total_wts, + dtype_wts, + shape_out, + dtype_out, + enable_trace=enable_trace, + trace_size=trace_size, +) + + +# ------------------------------------------------------ +# Define your golden reference +# ------------------------------------------------------ +class conv2d_relu_int_model(nn.Module): + def __init__(self, in_planes=64, planes=64): + super(conv2d_relu_int_model, self).__init__() + self.conv = nn.Conv2d(64, 64, kernel_size=1, bias=False) + self.relu = nn.ReLU() + + def forward(self, x): + out_int = self.conv(x) + out_float = out_int * conv_scale + out_int = self.relu(out_float) + out_float = relu_scale * torch.clamp( + torch.round(out_int / relu_scale), min, max + ) # converting to int to do proper clipping + return out_float + + +# ------------------------------------------------------ +# Pytorch baseline +# ------------------------------------------------------ +model = conv2d_relu_int_model() +model.eval() +model.conv.weight.data.copy_(int_weight) +golden_output = model(int_inp) + +# ------------------------------------------------------ +# Reorder input data-layout +# ------------------------------------------------------ +ds = DataShaper() +before_input = int_inp.squeeze().data.numpy().astype(dtype_in) +before_input.tofile(log_folder + "/before_ifm_mem_fmt_1x1.txt", sep=",", format="%d") +ifm_mem_fmt = ds.reorder_mat(before_input, "YCXC8", "CYX") +ifm_mem_fmt.tofile(log_folder + "/after_ifm_mem_fmt_1x1.txt", sep=",", format="%d") + +wts1 = ds.reorder_mat(int_weight.data.numpy().astype(dtype_wts), "OIYXI8O8", "OIYX") +total_wts = np.concatenate((wts1), axis=None) +total_wts.tofile(log_folder + "/weights_mem_fmt_final.txt", sep=",", format="%d") + +# ------------------------------------------------------ +# Main run loop +# ------------------------------------------------------ +for i in range(num_iter): + start = time.time_ns() + aie_output = execute(app, ifm_mem_fmt, total_wts) * relu_scale + stop = time.time_ns() + + if enable_trace: + aie_output, trace = extract_trace(aie_output, shape_out, dtype_out, trace_size) + write_out_trace(trace, trace_file) + + npu_time = stop - start + npu_time_total = npu_time_total + npu_time + +# ------------------------------------------------------ +# Reorder output data-layout +# ------------------------------------------------------ +temp_out = aie_output.reshape(32, 8, 32, 8) +temp_out = ds.reorder_mat(temp_out, "CDYX", "YCXD") +ofm_mem_fmt = temp_out.reshape(64, 32, 32) +ofm_mem_fmt.tofile(log_folder + "/after_ofm_mem_fmt_final.txt", sep=",", format="%d") +ofm_mem_fmt_out = torch.from_numpy(ofm_mem_fmt).unsqueeze(0) + +# ------------------------------------------------------ +# Compare the AIE output and the golden reference +# ------------------------------------------------------ +print("\nAvg NPU time: {}us.".format(int((npu_time_total / num_iter) / 1000))) + +assert np.allclose( + ofm_mem_fmt_out.detach().numpy(), + golden_output.detach().numpy(), + rtol=0, + atol=2 * relu_scale, +) + +print("\nPASS!\n") diff --git a/programming_examples/ml/resnet/README.md b/programming_examples/ml/resnet/README.md new file mode 100755 index 0000000000..6382079c62 --- /dev/null +++ b/programming_examples/ml/resnet/README.md @@ -0,0 +1,121 @@ + + +# ResNet with Offloaded Conv2_x Bottleneck Blocks + +## Introduction +ResNet [[1]](#1) is a convolutional neural network architecture that has gained significant popularity for various computer vision tasks, including image classification, object detection, and image segmentation. It is renowned for its depth and efficiency in training very deep networks. + +This README focuses on a specific optimization technique applied to ResNet, specifically targeting the offloading of the conv2_x part of the bottleneck blocks. By offloading computations to dedicated hardware accelerators or specialized processors, we aim to improve the overall efficiency and speed of the network, especially when deploying it on resource-constrained devices or in scenarios where real-time processing is critical. + + +## ResNet Architecture Overview +ResNet consists of several key components: + +1. Input Layer: Accepts input image data with dimensions typically set to 224x224x3 (width, height, RGB channels). +2. Convolutional Layers: The initial layers perform convolution operations to extract basic features from the input image. +3. Bottleneck Blocks: + * ResNet is composed of multiple bottleneck blocks grouped into different stages (conv2_x, conv3_x, conv4_x, conv5_x). + * Each bottleneck block contains convolutional layers and shortcut connections that facilitate the learning of residual mappings. + * The conv2_x stage is particularly targeted for offloading computations in this optimization. +4. Pooling Layers: Max pooling layers reduce the spatial dimensions of the feature maps. +5. Fully Connected Layer: Produces the final output predictions, typically followed by a softmax activation for classification tasks. + + +## Offloading Conv2_x Bottleneck Blocks +The conv2_x stage of ResNet comprises a series of bottleneck blocks, each containing convolutional layers responsible for learning more complex features from the input data. By offloading the computations within these blocks to AI Engine, we aim to: + +* Reduce the computational burden on the main processing unit (e.g., CPU or GPU). +* Improve overall inference speed and efficiency, especially in scenarios where real-time processing is crucial. +* Enable deployment on resource-constrained devices with limited computational resources. + +## Usage and Deployment +To leverage the optimized ResNet with offloaded conv2_x bottleneck blocks: +* [IRON Programming](https://github.com/Xilinx/mlir-aie/tree/gagan_asplos_resnet/programming_examples/ml/resnet/layers_conv2_x): Demonstrates the IRON flow for offloading conv2_x to AIE. + + +## Acceleration Techniques +1. Depth-First/Layer-Fused Implementation: Spatial architectures provide coarse-grained flexibility that allows for tailoring of the dataflow to optimize data movement. By tailoring the dataflow, we implement depth-first schedule for a bottleneck block routing the output of one convolutional operation on an AIE core directly to another convolutional operation on a separate AIE core, all without the need to transfer intermediate results off-chip. This approach effectively minimizes the memory footprint associated with intermediate data, mitigating the overhead of costly off-chip accesses leading to increase in the overall performance. + + +2. Data Layout: Optimize activation and weight layout to enhance memory access patterns and enables effective utilization of AIE parallel processing units, ultimately improving the performance of 2D convolution operations. + +3. Kernel Optimzation: To optimize convolution operations on AIE, we vectorize the code using AIE vector intrinsics. We load 8 elements of the input channel into vector registers using vector load intrinsic. We apply the convolution operation on this loaded data, utilizing for enhanced computational efficiency. To ensure accurate convolution results, particularly at the edges of feature maps, we implement zero-padding to handle boundary conditions. This comprehensive approach optimizes convolution processing on AIE, facilitating efficient and accurate feature extraction in neural network applications. Input is 4x8 matrix corresponding to 4 element of row and 8 input channels. + +4. Quantization: We use int8 precision for activationa and weights. At int8 precision, AIE offers the highest compute density with 256 MAC/cycle. + +5. Layer Fused: We perform two levels of fusion. First, we fuse ReLU in convolution using SRS capabilities of AIE. Second, we fuse BatchNorm into convolution weights. + + +## Data Layout +We need to ensure that the data layout is compatible with efficient SIMD processing and rearrange the input data into a format where contiguous elements represent consecutive X-dimension values for each channel. For more efficient processing, we adopt a channels-last memory ordering, denoted as NYCXC8, to ensure that channels become the densest dimension. Operating on 8 elements simultaneously, we process 8 channels with the same width at once. Subsequently, we traverse the entire width dimension, handling the remaining channels in batches of 8. This process continues row-wise, resulting in our final data layout pattern: NYCXC8. This optimized layout enhances memory access patterns and enables effective utilization of parallel processing units, ultimately improving the performance of 2D convolution operations. This transformation ensures that data can be efficiently loaded into SIMD registers and processed in parallel. + +YCXC8 Input/Output Data Layout: + +In the YCXC8 (with N=1) data layout, the data is organized in memory as follows: + +* Y: Represents the output feature map dimension. +* C: Denotes the number of channels. +* X: Represents the input feature map dimension. +* C8: Indicates that 8 elements of the input channel are processed together. + +OIYXI8O8 Weight Layout: + +We align the weight layout as specified: O,I,Y,X,I8,O8, to match the input image processing. We first load the weight tensor, organizing it to match this layout, where dimensions represent: output channels, input channels, kernel height, kernel width, input channel groups of 8, and output channel groups of 8. By aligning the weight layout in this manner, we enable seamless integration with the input data layout, maximizing parallelism and minimizing memory access overhead. + +In the OIYXI8O8 data layout, the data is organized in memory as follows: + +* O: Denotes the number of output channels. +* I: Denotes the number of input channels. +* Y: Represents the kernel height. +* X: Represents the kernel weight. +* I8: Indicates that 8 elements of the input channel are processed together. +* O8: Indicates that 8 elements of the output channel are processed together. + +## Fusing Convolution and Batch Normalization + +We assume the BatchNorm layer is fused into Convoluion Layer. Fusing BatchNorm into convolution involves incorporating the normalization step directly into the convolution operation. This is achieved by modifying the weights of the convolutional filters to include the scaling and shifting factors. Specifically, the weights are adjusted such that the convolution operation performs the normalization, scaling, and shifting in a single step. + +## Fusing ReLU + +Fusing ReLU into the convolution operation can further optimize the implementation by reducing memory bandwidth requirements and computational overhead. ReLU activation function introduces non-linearity by setting negative values to zero and leaving positive values unchanged. Utilize SIMD instructions to efficiently compute ReLU activation in parallel with convolution. After performing the convolution operation, apply ReLU activation function at vector register level. +We use `aie::set_rounding()` and `aie::set_saturation()` to set the rounding and saturation modes for the computed results in the accumulator. Seeting round mode `postitive_inf` rounds halfway towards positive infinity while setting saturation to `aie::saturation_mode::saturate` saturation rounds an uint8 range (0, 255). + +``` +::aie::set_saturation( + aie::saturation_mode::saturate); // Needed to saturate properly to uint8 +::aie::set_rounding( + aie::rounding_mode::positive_inf); // Needed to saturate properly to uint8 +``` +After convolution and ReLU fusion, the output data is generate in YCXC8 layout. Ensure that the output data layout is compatible with subsequent layers or processing steps in the neural network architecture. + +## Compilation +To compile the design: +``` +make +``` + +To run the design: +``` +make run_py +``` + +### Prerequisites + +To install the dependencies, run the following command: +``` +pip install -r requirements.txt + +``` + +## References +[1] +He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770-778). + diff --git a/programming_examples/ml/resnet/layers_conv2_x/CMakeLists.txt b/programming_examples/ml/resnet/layers_conv2_x/CMakeLists.txt new file mode 100755 index 0000000000..4b897cb29c --- /dev/null +++ b/programming_examples/ml/resnet/layers_conv2_x/CMakeLists.txt @@ -0,0 +1,89 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +# parameters +# -DBOOST_ROOT: Path to Boost install +# -DOpenCV_DIR: Path to OpenCV install +# -DXRT_INC_DIR: Full path to src/runtime_src/core/include in XRT cloned repo +# -DXRT_LIB_DIR: Path to xrt_coreutil.lib +# -DTARGET_NAME: Target name to be built + +# cmake needs this line +cmake_minimum_required(VERSION 3.1) + +find_program(WSL NAMES powershell.exe) + +if (NOT WSL) + set(BOOST_ROOT /usr/include/boost CACHE STRING "Path to Boost install") + set(OpenCV_DIR /usr/include/opencv4 CACHE STRING "Path to OpenCV install") + set(XRT_INC_DIR /opt/xilinx/xrt/include CACHE STRING "Path to XRT cloned repo") + set(XRT_LIB_DIR /opt/xilinx/xrt/lib CACHE STRING "Path to xrt_coreutil.lib") +else() + set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") + set(OpenCV_DIR C:/Technical/thirdParty/opencv/build CACHE STRING "Path to OpenCV install") + set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") + set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") +endif () + +set(EDGEDETECT_WIDTH 1920 CACHE STRING "image width") +set(EDGEDETECT_HEIGHT 1080 CACHE STRING "image height") + +set(TARGET_NAME test CACHE STRING "Target to be built") + +SET (ProjectName ${TARGET_NAME}) +SET (currentTarget ${TARGET_NAME}) + +if ( WSL ) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}) +endif () + +project(${ProjectName}) + +# Find packages +find_package(Boost REQUIRED) +find_package(OpenCV REQUIRED) +message("opencv library paht: ${OpenCV_LIB_PATH}") +message("opencv libs: ${OpenCV_LIBS}") + + +add_executable(${currentTarget} + ${CMAKE_CURRENT_SOURCE_DIR}/../../../utils/OpenCVUtils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../../utils/xrtUtils.cpp + test.cpp +) + +target_compile_definitions(${currentTarget} PUBLIC + EDGEDETECT_WIDTH=${EDGEDETECT_WIDTH} + EDGEDETECT_HEIGHT=${EDGEDETECT_HEIGHT} + DISABLE_ABI_CHECK=1 + ) + +target_include_directories (${currentTarget} PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/../../../utils + ${XRT_INC_DIR} + ${OpenCV_INCLUDE_DIRS} + ${Boost_INCLUDE_DIRS} +) + +target_link_directories(${currentTarget} PUBLIC + ${XRT_LIB_DIR} + ${OpenCV_LIB_PATH} + ${Boost_LIBRARY_DIRS} +) + +if (NOT WSL) + target_link_libraries(${currentTarget} PUBLIC + xrt_coreutil + ${OpenCV_LIBS} + boost_program_options + boost_filesystem + ) +else() + target_link_libraries(${currentTarget} PUBLIC + xrt_coreutil + ${OpenCV_LIBS} + ) +endif() diff --git a/programming_examples/ml/resnet/layers_conv2_x/Makefile b/programming_examples/ml/resnet/layers_conv2_x/Makefile new file mode 100755 index 0000000000..2f978a05ba --- /dev/null +++ b/programming_examples/ml/resnet/layers_conv2_x/Makefile @@ -0,0 +1,50 @@ +##===- Makefile -----------------------------------------------------------===## +# +# This file licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +##===----------------------------------------------------------------------===## + +include ../../../makefile-common + +mlirFileName = aie + +all: build/conv2dk1_i8.o build/conv2dk1_skip_init.o build/conv2dk3.o build/conv2dk1_skip.o build/conv2dk1_ui8.o build/final.xclbin + +# build/${mlirFileName}.mlir: aie2.py +# mkdir -p ${@D} +# python3 $< > $@ + +build/${mlirFileName}.mlir: aie.mlir + mkdir -p ${@D} + cp $< $@ +insts.txt: build/${mlirFileName}.mlir + aiecc.py -v --aie-only-generate-ipu --ipu-insts-name=$@ $< + +build/conv2dk1_i8.o: ../../../../aie_kernels/aie2/conv2dk1.cc + xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ + +build/conv2dk3.o: ../../../../aie_kernels/aie2/conv2dk3.cc + xchesscc -d ${CHESSCC2_FLAGS} -DUINT8_ACT -c $< -o $@ + +build/conv2dk1_skip_init.o: ../../../../aie_kernels/aie2/conv2dk1_skip_init.cc + xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ + +build/conv2dk1_ui8.o: ../../../../aie_kernels/aie2/conv2dk1.cc + xchesscc -d ${CHESSCC2_FLAGS} -DUINT8_ACT -c $< -o $@ + +build/conv2dk1_skip.o: ../../../../aie_kernels/aie2/conv2dk1_skip.cc + xchesscc -d ${CHESSCC2_FLAGS} -DUINT8_ACT -c $< -o $@ + +build/final.xclbin: build/${mlirFileName}.mlir + cd build && aiecc.py -v --aie-generate-cdo --aie-generate-ipu --no-compile-host \ + --xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) + +clean: + rm -rf build *.elf* *.lst *.bif log* ${mlirFileName}.mlir.prj *.xclbin sim \ + chess* *.o insts.txt \ + *.log aie_partition.json *.bin BOOT.BIN _x test.exe + +run_py: + ${powershell} python3 test.py diff --git a/programming_examples/ml/resnet/layers_conv2_x/aie.mlir b/programming_examples/ml/resnet/layers_conv2_x/aie.mlir new file mode 100755 index 0000000000..ccc04efb9a --- /dev/null +++ b/programming_examples/ml/resnet/layers_conv2_x/aie.mlir @@ -0,0 +1,1014 @@ +//===- aie.mlir ------------------------------------------------*- MLIR -*-===// +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +module { +aie.device(ipu) { + + //shim + %tile00 = aie.tile(0, 0) + %tile10 = aie.tile(1, 0) + %tile20 = aie.tile(2, 0) + + //memtiles + %tile01 = aie.tile(0, 1) + %tile11 = aie.tile(1, 1) + %tile21 = aie.tile(2, 1) + + %tile02 = aie.tile(0, 2) + %tile03 = aie.tile(0, 3) + %tile04 = aie.tile(0, 5) + %tile05 = aie.tile(0, 4) + + %tile12 = aie.tile(1, 2) + %tile13 = aie.tile(1, 3) + %tile14 = aie.tile(1, 4) + %tile15 = aie.tile(1, 5) + + %tile22 = aie.tile(2, 2) + %tile23 = aie.tile(2, 3) + %tile24 = aie.tile(2, 4) + %tile25 = aie.tile(2, 5) + //Trace: add flow + aie.flow(%tile24, "Trace" : 0, %tile00, "DMA" : 1) + + %rtp2 = aie.buffer(%tile02) {sym_name = "rtp2"} : memref<16xi32> + %rtp3 = aie.buffer(%tile03) {sym_name = "rtp3"} : memref<16xi32> + %rtp4 = aie.buffer(%tile04) {sym_name = "rtp4"} : memref<16xi32> + %rtp5 = aie.buffer(%tile05) {sym_name = "rtp5"} : memref<16xi32> + + %rtp12 = aie.buffer(%tile12) {sym_name = "rtp12"} : memref<16xi32> + %rtp13 = aie.buffer(%tile13) {sym_name = "rtp13"} : memref<16xi32> + %rtp14 = aie.buffer(%tile14) {sym_name = "rtp14"} : memref<16xi32> + %rtp15 = aie.buffer(%tile15) {sym_name = "rtp15"} : memref<16xi32> + + %rtp22 = aie.buffer(%tile22) {sym_name = "rtp22"} : memref<16xi32> + %rtp23 = aie.buffer(%tile23) {sym_name = "rtp23"} : memref<16xi32> + %rtp24 = aie.buffer(%tile24) {sym_name = "rtp24"} : memref<16xi32> + %rtp25 = aie.buffer(%tile25) {sym_name = "rtp25"} : memref<16xi32> + + // ___________________________Bottleneck 1___________________________ + //initial activation for 1x1 + aie.objectfifo @inOF_act_L3L2(%tile00, {%tile02,%tile01},[2,2,4]): !aie.objectfifo> // from shim broadcast to core2 and memtile + aie.objectfifo @skip_buf(%tile01, {%tile05}, 2: i32): !aie.objectfifo> // link the skip buffer in memtile to conv1_skip in tile4 + aie.objectfifo.link[@inOF_act_L3L2]-> [@skip_buf] () + + //wts + aie.objectfifo @inOF_wts_0_L3L2(%tile00, {%tile01}, 1 : i32) : !aie.objectfifo> // total buffer for weights + aie.objectfifo @wts_buf_00(%tile01, {%tile02}, 1 : i32) : !aie.objectfifo> // L1 buffer for first conv1x1 weights 256x64x1x1= 16384 + aie.objectfifo @wts_buf_01(%tile01, {%tile03,%tile04}, 1 : i32) : !aie.objectfifo> // L1 buffer for middle conv3x3 weights 64x64x3x3= 36864 + aie.objectfifo @wts_buf_02(%tile01, {%tile05}, 1 : i32) : !aie.objectfifo> // L1 buffer for final conv1x1 weights 64x256x1x1= 16384 + aie.objectfifo.link[@inOF_wts_0_L3L2]-> [@wts_buf_00,@wts_buf_01,@wts_buf_02] () + + // OF for intermediate ofm between 1x1 and 3x3 + aie.objectfifo @act_2_3_4(%tile02, {%tile03,%tile04}, 4 : i32) : !aie.objectfifo> //32x1x32 + // OF for intermediate ofm between 3x3 and 1x1 + aie.objectfifo @act_3_5(%tile03, {%tile05}, 2 : i32) : !aie.objectfifo> //32x1x32 + aie.objectfifo @act_4_5(%tile04, {%tile05}, 2 : i32) : !aie.objectfifo> //32x1x32 + + // ___________________________Bottleneck 2___________________________ + //wts + aie.objectfifo @inOF_wts_1_L3L2(%tile10, {%tile11}, 1 : i32) : !aie.objectfifo> // total buffer for weights + aie.objectfifo @wts_buf_10(%tile11, {%tile15}, 1 : i32) : !aie.objectfifo> // L1 buffer for first conv1x1 weights 256x64x1x1= 16384 + aie.objectfifo @wts_buf_11(%tile11, {%tile12,%tile14}, 1 : i32) : !aie.objectfifo> // L1 buffer for middle conv3x3 weights 64x64x3x3= 36864 + aie.objectfifo @wts_buf_12(%tile11, {%tile13}, 1 : i32) : !aie.objectfifo> // L1 buffer for final conv1x1 weights 64x256x1x1= 16384 + aie.objectfifo.link[@inOF_wts_1_L3L2]-> [@wts_buf_10,@wts_buf_11,@wts_buf_12] () + + //initial activation for 1x1 + aie.objectfifo @act_05_15(%tile05, {%tile15,%tile01},[2,2,4]): !aie.objectfifo> // from shim broadcast to core2 and memtile + aie.objectfifo @skip_buf2(%tile01, {%tile13}, 2: i32): !aie.objectfifo> // link the skip buffer in memtile to conv1_skip in tile4 + aie.objectfifo.link[@act_05_15]-> [@skip_buf2] () + + // OF for intermediate ofm between 1x1 and 3x3 + aie.objectfifo @act_15_12_14(%tile15, {%tile12,%tile14}, 4 : i32) : !aie.objectfifo> //32x1x32 + + // OF for intermediate ofm between 3x3 and 1x1 + aie.objectfifo @act_12_13(%tile12, {%tile13}, 2 : i32) : !aie.objectfifo> //32x1x32 + aie.objectfifo @act_14_13(%tile14, {%tile13}, 2 : i32) : !aie.objectfifo> //32x1x32 + + + // ___________________________Bottleneck 3___________________________ + //wts + aie.objectfifo @inOF_wts_2_L3L2(%tile20, {%tile21}, 1 : i32) : !aie.objectfifo> // total buffer for weights + aie.objectfifo @wts_buf_20(%tile21, {%tile22}, 1 : i32) : !aie.objectfifo> // L1 buffer for first conv1x1 weights 256x64x1x1= 16384 + aie.objectfifo @wts_buf_21(%tile21, {%tile23,%tile25}, 1 : i32) : !aie.objectfifo> // L1 buffer for middle conv3x3 weights 64x64x3x3= 36864 + aie.objectfifo @wts_buf_22(%tile21, {%tile24}, 1 : i32) : !aie.objectfifo> // L1 buffer for final conv1x1 weights 64x256x1x1= 16384 + aie.objectfifo.link[@inOF_wts_2_L3L2]-> [@wts_buf_20,@wts_buf_21,@wts_buf_22] () + + //initial activation for 1x1 + aie.objectfifo @act_13_22(%tile13, {%tile22,%tile21},[2,2,4]): !aie.objectfifo> // from shim broadcast to core2 and memtile + aie.objectfifo @skip_buf3(%tile21, {%tile24}, 2: i32): !aie.objectfifo> // link the skip buffer in memtile to conv1_skip in tile4 + aie.objectfifo.link[@act_13_22]-> [@skip_buf3] () + + // OF for intermediate ofm between 1x1 and 3x3 + aie.objectfifo @act_22_23_25(%tile22, {%tile23,%tile25}, 4 : i32) : !aie.objectfifo> //32x1x32 + + // OF for intermediate ofm between 3x3 and 1x1 + aie.objectfifo @act_23_24(%tile23, {%tile24}, 2 : i32) : !aie.objectfifo> //32x1x32 + aie.objectfifo @act_25_24(%tile25, {%tile24}, 2 : i32) : !aie.objectfifo> //32x1x32 + + // Final output OF + aie.objectfifo @outOFL2L3(%tile24, {%tile10}, 2 : i32) : !aie.objectfifo> //32x1x64 + + // ___________________________Kernel Call___________________________ + func.func private @conv2dk1_i8(memref<32x1x64xi8>, memref<4096xi8>, memref<32x1x64xui8>,i32,i32,i32,i32) -> () + func.func private @conv2dk3_ui8(memref<32x1x64xui8>,memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + func.func private @conv2dk1_skip_init_i8(memref<32x1x32xui8>,memref<32x1x32xui8>, memref<32768xi8>,memref<32x1x256xui8>,memref<32x1x64xi8>,i32,i32,i32,i32,i32,i32,i32) -> () + + func.func private @conv2dk1_ui8(memref<32x1x256xui8>, memref<16384xi8>, memref<32x1x64xui8>,i32,i32,i32,i32) -> () + func.func private @conv2dk1_skip_ui8(memref<32x1x32xui8>,memref<32x1x32xui8>, memref<16384xi8>,memref<32x1x256xui8>,memref<32x1x256xui8>,i32,i32,i32,i32,i32) -> () + // ___________________________Bottleneck 1___________________________ + // 1x1 conv + aie.core(%tile02) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + + %x_dim = arith.constant 32 : i32 + %y_dim = arith.constant 32 : index + + %ci = arith.constant 64 : i32 + %co = arith.constant 64 : i32 + + %intmax = arith.constant 0xFFFFFFFF : index + scf.for %arg3 = %c0 to %intmax step %c1 { + // acquire wts once + %subviewWts = aie.objectfifo.acquire @wts_buf_00(Consume, 1) : !aie.objectfifosubview> + %elemWts = aie.objectfifo.subview.access %subviewWts[0] : !aie.objectfifosubview> -> memref<4096xi8> + %scale = memref.load %rtp2[%c0] : memref<16xi32> + + scf.for %n = %c0 to %y_dim step %c1 { + %subviewIn = aie.objectfifo.acquire @inOF_act_L3L2(Consume, 1) : !aie.objectfifosubview> + %elemIn = aie.objectfifo.subview.access %subviewIn[0] : !aie.objectfifosubview> -> memref<32x1x64xi8> + + %subviewOut = aie.objectfifo.acquire @act_2_3_4(Produce, 1) : !aie.objectfifosubview> + %elemOut0 = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + + func.call @conv2dk1_i8(%elemIn,%elemWts, %elemOut0,%x_dim,%ci,%co,%scale) : (memref<32x1x64xi8>,memref<4096xi8>, memref<32x1x64xui8>,i32,i32,i32,i32) -> () + + aie.objectfifo.release @inOF_act_L3L2(Consume, 1) + aie.objectfifo.release @act_2_3_4(Produce, 1) + + } + aie.objectfifo.release @wts_buf_00(Consume, 1) + } + aie.end + } { link_with="conv2dk1_i8.o" } + + // 3x3 conv + aie.core(%tile03) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + + %x_dim = arith.constant 32 : i32 + %y_dim_minus_2 = arith.constant 30 : index + + %ci = arith.constant 64 : i32 + %co = arith.constant 32 : i32 + + %kx_dim = arith.constant 3 : i32 + %ky_dim = arith.constant 3 : i32 + + %top = arith.constant 0 : i32 + %middle = arith.constant 1 : i32 + %bottom = arith.constant 2 : i32 + + %co_offset = arith.constant 0 : i32 + // acquire wts once + // %subviewWts = aie.objectfifo.acquire(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) : !aie.objectfifosubview> + // %scale = memref.load %rtp3[%c0] : memref<16xi32> + + %scale = arith.constant 1 : i32 + %intmax = arith.constant 0xFFFFFFFF : index + scf.for %arg3 = %c0 to %intmax step %c1 { + %subviewWts = aie.objectfifo.acquire @wts_buf_01(Consume, 1) : !aie.objectfifosubview> + %elemWts = aie.objectfifo.subview.access %subviewWts[0] : !aie.objectfifosubview> -> memref<36864xi8> + + // Preamble : Top Border + + %subviewIn = aie.objectfifo.acquire @act_2_3_4(Consume, 2) : !aie.objectfifosubview> + %elemIn0 = aie.objectfifo.subview.access %subviewIn[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1 = aie.objectfifo.subview.access %subviewIn[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut = aie.objectfifo.acquire @act_3_5(Produce, 1) : !aie.objectfifosubview> + %elemOut = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + + + func.call @conv2dk3_ui8(%elemIn0,%elemIn0,%elemIn1,%elemWts, %elemOut,%x_dim,%ci,%co,%kx_dim,%ky_dim,%top,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + + aie.objectfifo.release @act_3_5(Produce, 1) + + // Middle + scf.for %n = %c0 to %y_dim_minus_2 step %c1 { + %subviewIn1 = aie.objectfifo.acquire @act_2_3_4(Consume, 3) : !aie.objectfifosubview> + %elemIn1_0 = aie.objectfifo.subview.access %subviewIn1[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1_1 = aie.objectfifo.subview.access %subviewIn1[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1_2 = aie.objectfifo.subview.access %subviewIn1[2] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut1 = aie.objectfifo.acquire @act_3_5(Produce, 1) : !aie.objectfifosubview> + %elemOut1 = aie.objectfifo.subview.access %subviewOut1[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + func.call @conv2dk3_ui8(%elemIn1_0,%elemIn1_1,%elemIn1_2,%elemWts, %elemOut1,%x_dim,%ci,%co,%kx_dim,%ky_dim,%middle,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + aie.objectfifo.release @act_3_5(Produce, 1) + aie.objectfifo.release @act_2_3_4(Consume, 1) + + } + // Postamble : Bottom Border + %subviewIn2 = aie.objectfifo.acquire @act_2_3_4(Consume, 2) : !aie.objectfifosubview> + %elemIn2_0 = aie.objectfifo.subview.access %subviewIn2[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn2_1 = aie.objectfifo.subview.access %subviewIn2[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut2 = aie.objectfifo.acquire @act_3_5(Produce, 1) : !aie.objectfifosubview> + %elemOut2 = aie.objectfifo.subview.access %subviewOut2[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + + func.call @conv2dk3_ui8(%elemIn2_0,%elemIn2_1,%elemIn2_1,%elemWts, %elemOut2,%x_dim,%ci,%co,%kx_dim,%ky_dim,%bottom,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + + aie.objectfifo.release @act_3_5(Produce, 1) + aie.objectfifo.release @act_2_3_4(Consume, 2) + + //release weights + aie.objectfifo.release @wts_buf_01(Consume, 1) + } + // aie.objectfifo.release(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) + aie.end + } { link_with="conv2dk3.o" } + + // 3x3 conv + aie.core(%tile04) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + + %x_dim = arith.constant 32 : i32 + %y_dim_minus_2 = arith.constant 30 : index + + %ci = arith.constant 64 : i32 + %co = arith.constant 32 : i32 + + %kx_dim = arith.constant 3 : i32 + %ky_dim = arith.constant 3 : i32 + + %top = arith.constant 0 : i32 + %middle = arith.constant 1 : i32 + %bottom = arith.constant 2 : i32 + + %co_offset = arith.constant 32 : i32 + %intmax = arith.constant 0xFFFFFFFF : index + // %scale = memref.load %rtp4[%c0] : memref<16xi32> + %scale = arith.constant 1 : i32 + scf.for %arg3 = %c0 to %intmax step %c1 { + // acquire wts once + // %subviewWts = aie.objectfifo.acquire(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) : !aie.objectfifosubview> + %subviewWts = aie.objectfifo.acquire @wts_buf_01(Consume, 1) : !aie.objectfifosubview> + %elemWts = aie.objectfifo.subview.access %subviewWts[0] : !aie.objectfifosubview> -> memref<36864xi8> + + // Preamble : Top Border + + %subviewIn = aie.objectfifo.acquire @act_2_3_4(Consume, 2) : !aie.objectfifosubview> + %elemIn0 = aie.objectfifo.subview.access %subviewIn[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1 = aie.objectfifo.subview.access %subviewIn[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut = aie.objectfifo.acquire @act_4_5(Produce, 1) : !aie.objectfifosubview> + %elemOut = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + + func.call @conv2dk3_ui8(%elemIn0,%elemIn0,%elemIn1,%elemWts, %elemOut,%x_dim,%ci,%co,%kx_dim,%ky_dim,%top,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + + aie.objectfifo.release @act_4_5(Produce, 1) + + // Middle + scf.for %n = %c0 to %y_dim_minus_2 step %c1 { + %subviewIn1 = aie.objectfifo.acquire @act_2_3_4(Consume, 3) : !aie.objectfifosubview> + %elemIn1_0 = aie.objectfifo.subview.access %subviewIn1[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1_1 = aie.objectfifo.subview.access %subviewIn1[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1_2 = aie.objectfifo.subview.access %subviewIn1[2] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut1 = aie.objectfifo.acquire @act_4_5(Produce, 1) : !aie.objectfifosubview> + %elemOut1 = aie.objectfifo.subview.access %subviewOut1[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + func.call @conv2dk3_ui8(%elemIn1_0,%elemIn1_1,%elemIn1_2,%elemWts, %elemOut1,%x_dim,%ci,%co,%kx_dim,%ky_dim,%middle,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + aie.objectfifo.release @act_4_5(Produce, 1) + aie.objectfifo.release @act_2_3_4(Consume, 1) + + } + // Postamble : Bottom Border + %subviewIn2 = aie.objectfifo.acquire @act_2_3_4(Consume, 2) : !aie.objectfifosubview> + %elemIn2_0 = aie.objectfifo.subview.access %subviewIn2[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn2_1 = aie.objectfifo.subview.access %subviewIn2[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut2 = aie.objectfifo.acquire @act_4_5(Produce, 1) : !aie.objectfifosubview> + %elemOut2 = aie.objectfifo.subview.access %subviewOut2[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + + func.call @conv2dk3_ui8(%elemIn2_0,%elemIn2_1,%elemIn2_1,%elemWts, %elemOut2,%x_dim,%ci,%co,%kx_dim,%ky_dim,%bottom,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + + aie.objectfifo.release @act_4_5(Produce, 1) + aie.objectfifo.release @act_2_3_4(Consume, 2) + + //release weights + aie.objectfifo.release @wts_buf_01(Consume, 1) + // aie.objectfifo.release(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) + } + aie.end + + } { link_with="conv2dk3.o" } + + // 1x1 conv with skip + aie.core(%tile05) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + + %x_dim = arith.constant 32 : i32 + %y_dim = arith.constant 32 : index + + %ci = arith.constant 64 : i32 + %co = arith.constant 256 : i32 + %ci_skip = arith.constant 64 : i32 + + %intmax = arith.constant 0xFFFFFFFF : index + scf.for %arg3 = %c0 to %intmax step %c1 { + // acquire wts once + %subviewWts = aie.objectfifo.acquire @wts_buf_02(Consume, 1) : !aie.objectfifosubview> + %elemWts = aie.objectfifo.subview.access %subviewWts[0] : !aie.objectfifosubview> -> memref<32768xi8> + + %scale = memref.load %rtp5[%c0] : memref<16xi32> + %skip_scale = memref.load %rtp5[%c1] : memref<16xi32> + %skip_conv_scale = memref.load %rtp5[%c2] : memref<16xi32> + + // %skip_scale = arith.constant 0 : i32 + scf.for %n = %c0 to %y_dim step %c1 { + %subviewIn0 = aie.objectfifo.acquire @act_3_5(Consume, 1) : !aie.objectfifosubview> + %elemIn0 = aie.objectfifo.subview.access %subviewIn0[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + %subviewIn1 = aie.objectfifo.acquire @act_4_5(Consume, 1) : !aie.objectfifosubview> + %elemIn1 = aie.objectfifo.subview.access %subviewIn1[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + %subviewOut = aie.objectfifo.acquire @act_05_15(Produce, 1) : !aie.objectfifosubview> + %elemOut0 = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview> -> memref<32x1x256xui8> + + %subviewSkip = aie.objectfifo.acquire @skip_buf(Consume, 1) : !aie.objectfifosubview> + %elemSkip = aie.objectfifo.subview.access %subviewSkip[0] : !aie.objectfifosubview> -> memref<32x1x64xi8> + + + // %skip_scale = arith.constant 0 : i32 + func.call @conv2dk1_skip_init_i8(%elemIn0,%elemIn1,%elemWts, %elemOut0,%elemSkip,%x_dim,%ci,%co,%ci_skip,%scale,%skip_scale,%skip_conv_scale) : (memref<32x1x32xui8>,memref<32x1x32xui8>, memref<32768xi8>,memref<32x1x256xui8>,memref<32x1x64xi8>,i32,i32,i32,i32,i32,i32,i32) -> () + + aie.objectfifo.release @act_05_15(Produce, 1) + aie.objectfifo.release @act_3_5(Consume, 1) + aie.objectfifo.release @act_4_5(Consume, 1) + aie.objectfifo.release @skip_buf(Consume, 1) + + } + aie.objectfifo.release @wts_buf_02(Consume, 1) + } + aie.end + } { link_with="conv2dk1_skip_init.o" } + // ___________________________Bottleneck 2___________________________ + // 1x1 conv + aie.core(%tile15) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + + %x_dim = arith.constant 32 : i32 + %y_dim = arith.constant 32 : index + + %ci = arith.constant 256 : i32 + %co = arith.constant 64 : i32 + + %intmax = arith.constant 0xFFFFFFFF : index + scf.for %arg3 = %c0 to %intmax step %c1 { + // acquire wts once + %subviewWts = aie.objectfifo.acquire @wts_buf_10(Consume, 1) : !aie.objectfifosubview> + %elemWts = aie.objectfifo.subview.access %subviewWts[0] : !aie.objectfifosubview> -> memref<16384xi8> + %scale = memref.load %rtp15[%c0] : memref<16xi32> + + scf.for %n = %c0 to %y_dim step %c1 { + %subviewIn = aie.objectfifo.acquire @act_05_15(Consume, 1) : !aie.objectfifosubview> + %elemIn = aie.objectfifo.subview.access %subviewIn[0] : !aie.objectfifosubview> -> memref<32x1x256xui8> + + %subviewOut = aie.objectfifo.acquire @act_15_12_14(Produce, 1) : !aie.objectfifosubview> + %elemOut0 = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + + func.call @conv2dk1_ui8(%elemIn,%elemWts, %elemOut0,%x_dim,%ci,%co,%scale) : (memref<32x1x256xui8>,memref<16384xi8>, memref<32x1x64xui8>,i32,i32,i32,i32) -> () + + aie.objectfifo.release @act_05_15(Consume, 1) + aie.objectfifo.release @act_15_12_14(Produce, 1) + + } + aie.objectfifo.release @wts_buf_10(Consume, 1) + } + aie.end + } { link_with="conv2dk1_ui8.o" } + + // 3x3 conv + aie.core(%tile12) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + + %x_dim = arith.constant 32 : i32 + %y_dim_minus_2 = arith.constant 30 : index + + %ci = arith.constant 64 : i32 + %co = arith.constant 32 : i32 + + %kx_dim = arith.constant 3 : i32 + %ky_dim = arith.constant 3 : i32 + + %top = arith.constant 0 : i32 + %middle = arith.constant 1 : i32 + %bottom = arith.constant 2 : i32 + + %co_offset = arith.constant 0 : i32 + // acquire wts once + // %subviewWts = aie.objectfifo.acquire(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) : !aie.objectfifosubview> + // %scale = memref.load %rtp3[%c0] : memref<16xi32> + + %scale = arith.constant 1 : i32 + %intmax = arith.constant 0xFFFFFFFF : index + scf.for %arg3 = %c0 to %intmax step %c1 { + %subviewWts = aie.objectfifo.acquire @wts_buf_11(Consume, 1) : !aie.objectfifosubview> + %elemWts = aie.objectfifo.subview.access %subviewWts[0] : !aie.objectfifosubview> -> memref<36864xi8> + + // Preamble : Top Border + + %subviewIn = aie.objectfifo.acquire @act_15_12_14(Consume, 2) : !aie.objectfifosubview> + %elemIn0 = aie.objectfifo.subview.access %subviewIn[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1 = aie.objectfifo.subview.access %subviewIn[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut = aie.objectfifo.acquire @act_12_13(Produce, 1) : !aie.objectfifosubview> + %elemOut = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + + + func.call @conv2dk3_ui8(%elemIn0,%elemIn0,%elemIn1,%elemWts, %elemOut,%x_dim,%ci,%co,%kx_dim,%ky_dim,%top,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + + aie.objectfifo.release @act_12_13(Produce, 1) + + // Middle + scf.for %n = %c0 to %y_dim_minus_2 step %c1 { + %subviewIn1 = aie.objectfifo.acquire @act_15_12_14(Consume, 3) : !aie.objectfifosubview> + %elemIn1_0 = aie.objectfifo.subview.access %subviewIn1[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1_1 = aie.objectfifo.subview.access %subviewIn1[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1_2 = aie.objectfifo.subview.access %subviewIn1[2] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut1 = aie.objectfifo.acquire @act_12_13(Produce, 1) : !aie.objectfifosubview> + %elemOut1 = aie.objectfifo.subview.access %subviewOut1[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + func.call @conv2dk3_ui8(%elemIn1_0,%elemIn1_1,%elemIn1_2,%elemWts, %elemOut1,%x_dim,%ci,%co,%kx_dim,%ky_dim,%middle,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + aie.objectfifo.release @act_12_13(Produce, 1) + aie.objectfifo.release @act_15_12_14(Consume, 1) + + } + // Postamble : Bottom Border + %subviewIn2 = aie.objectfifo.acquire @act_15_12_14(Consume, 2) : !aie.objectfifosubview> + %elemIn2_0 = aie.objectfifo.subview.access %subviewIn2[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn2_1 = aie.objectfifo.subview.access %subviewIn2[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut2 = aie.objectfifo.acquire @act_12_13(Produce, 1) : !aie.objectfifosubview> + %elemOut2 = aie.objectfifo.subview.access %subviewOut2[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + + func.call @conv2dk3_ui8(%elemIn2_0,%elemIn2_1,%elemIn2_1,%elemWts, %elemOut2,%x_dim,%ci,%co,%kx_dim,%ky_dim,%bottom,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + + aie.objectfifo.release @act_12_13(Produce, 1) + aie.objectfifo.release @act_15_12_14(Consume, 2) + + //release weights + aie.objectfifo.release @wts_buf_11(Consume, 1) + } + // aie.objectfifo.release(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) + aie.end + } { link_with="conv2dk3.o" } + + // 3x3 conv + aie.core(%tile14) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + + %x_dim = arith.constant 32 : i32 + %y_dim_minus_2 = arith.constant 30 : index + + %ci = arith.constant 64 : i32 + %co = arith.constant 32 : i32 + + %kx_dim = arith.constant 3 : i32 + %ky_dim = arith.constant 3 : i32 + + %top = arith.constant 0 : i32 + %middle = arith.constant 1 : i32 + %bottom = arith.constant 2 : i32 + + %co_offset = arith.constant 32 : i32 + %intmax = arith.constant 0xFFFFFFFF : index + // %scale = memref.load %rtp4[%c0] : memref<16xi32> + %scale = arith.constant 1 : i32 + scf.for %arg3 = %c0 to %intmax step %c1 { + // acquire wts once + // %subviewWts = aie.objectfifo.acquire(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) : !aie.objectfifosubview> + %subviewWts = aie.objectfifo.acquire @wts_buf_11(Consume, 1) : !aie.objectfifosubview> + %elemWts = aie.objectfifo.subview.access %subviewWts[0] : !aie.objectfifosubview> -> memref<36864xi8> + + // Preamble : Top Border + + %subviewIn = aie.objectfifo.acquire @act_15_12_14(Consume, 2) : !aie.objectfifosubview> + %elemIn0 = aie.objectfifo.subview.access %subviewIn[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1 = aie.objectfifo.subview.access %subviewIn[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut = aie.objectfifo.acquire @act_14_13(Produce, 1) : !aie.objectfifosubview> + %elemOut = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + + func.call @conv2dk3_ui8(%elemIn0,%elemIn0,%elemIn1,%elemWts, %elemOut,%x_dim,%ci,%co,%kx_dim,%ky_dim,%top,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + + aie.objectfifo.release @act_14_13(Produce, 1) + + // Middle + scf.for %n = %c0 to %y_dim_minus_2 step %c1 { + %subviewIn1 = aie.objectfifo.acquire @act_15_12_14(Consume, 3) : !aie.objectfifosubview> + %elemIn1_0 = aie.objectfifo.subview.access %subviewIn1[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1_1 = aie.objectfifo.subview.access %subviewIn1[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1_2 = aie.objectfifo.subview.access %subviewIn1[2] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut1 = aie.objectfifo.acquire @act_14_13(Produce, 1) : !aie.objectfifosubview> + %elemOut1 = aie.objectfifo.subview.access %subviewOut1[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + func.call @conv2dk3_ui8(%elemIn1_0,%elemIn1_1,%elemIn1_2,%elemWts, %elemOut1,%x_dim,%ci,%co,%kx_dim,%ky_dim,%middle,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + aie.objectfifo.release @act_14_13(Produce, 1) + aie.objectfifo.release @act_15_12_14(Consume, 1) + + } + // Postamble : Bottom Border + %subviewIn2 = aie.objectfifo.acquire @act_15_12_14(Consume, 2) : !aie.objectfifosubview> + %elemIn2_0 = aie.objectfifo.subview.access %subviewIn2[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn2_1 = aie.objectfifo.subview.access %subviewIn2[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut2 = aie.objectfifo.acquire @act_14_13(Produce, 1) : !aie.objectfifosubview> + %elemOut2 = aie.objectfifo.subview.access %subviewOut2[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + + func.call @conv2dk3_ui8(%elemIn2_0,%elemIn2_1,%elemIn2_1,%elemWts, %elemOut2,%x_dim,%ci,%co,%kx_dim,%ky_dim,%bottom,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + + aie.objectfifo.release @act_14_13(Produce, 1) + aie.objectfifo.release @act_15_12_14(Consume, 2) + + //release weights + aie.objectfifo.release @wts_buf_11(Consume, 1) + // aie.objectfifo.release(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) + } + aie.end + + } { link_with="conv2dk3.o" } + + // 1x1 conv with skip + aie.core(%tile13) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + + %x_dim = arith.constant 32 : i32 + %y_dim = arith.constant 32 : index + + %ci = arith.constant 64 : i32 + %co = arith.constant 256 : i32 + + %intmax = arith.constant 0xFFFFFFFF : index + scf.for %arg3 = %c0 to %intmax step %c1 { + // acquire wts once + %subviewWts = aie.objectfifo.acquire @wts_buf_12(Consume, 1) : !aie.objectfifosubview> + %elemWts = aie.objectfifo.subview.access %subviewWts[0] : !aie.objectfifosubview> -> memref<16384xi8> + + %scale = memref.load %rtp13[%c0] : memref<16xi32> + %skip_scale = memref.load %rtp13[%c1] : memref<16xi32> + // %skip_scale = arith.constant 0 : i32 + scf.for %n = %c0 to %y_dim step %c1 { + %subviewIn0 = aie.objectfifo.acquire @act_12_13(Consume, 1) : !aie.objectfifosubview> + %elemIn0 = aie.objectfifo.subview.access %subviewIn0[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + %subviewIn1 = aie.objectfifo.acquire @act_14_13(Consume, 1) : !aie.objectfifosubview> + %elemIn1 = aie.objectfifo.subview.access %subviewIn1[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + %subviewOut = aie.objectfifo.acquire @act_13_22(Produce, 1) : !aie.objectfifosubview> + %elemOut0 = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview> -> memref<32x1x256xui8> + + %subviewSkip = aie.objectfifo.acquire @skip_buf2(Consume, 1) : !aie.objectfifosubview> + %elemSkip = aie.objectfifo.subview.access %subviewSkip[0] : !aie.objectfifosubview> -> memref<32x1x256xui8> + + + // %skip_scale = arith.constant 0 : i32 + func.call @conv2dk1_skip_ui8(%elemIn0,%elemIn1,%elemWts, %elemOut0,%elemSkip,%x_dim,%ci,%co,%scale,%skip_scale) : (memref<32x1x32xui8>,memref<32x1x32xui8>, memref<16384xi8>,memref<32x1x256xui8>,memref<32x1x256xui8>,i32,i32,i32,i32,i32) -> () + + aie.objectfifo.release @act_13_22(Produce, 1) + aie.objectfifo.release @act_12_13(Consume, 1) + aie.objectfifo.release @act_14_13(Consume, 1) + aie.objectfifo.release @skip_buf2(Consume, 1) + + } + aie.objectfifo.release @wts_buf_12(Consume, 1) + } + aie.end + } { link_with="conv2dk1_skip.o" } + + + // ___________________________Bottleneck 3___________________________ + // 1x1 conv + aie.core(%tile22) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + + %x_dim = arith.constant 32 : i32 + %y_dim = arith.constant 32 : index + + %ci = arith.constant 256 : i32 + %co = arith.constant 64 : i32 + + %intmax = arith.constant 0xFFFFFFFF : index + scf.for %arg3 = %c0 to %intmax step %c1 { + // acquire wts once + %subviewWts = aie.objectfifo.acquire @wts_buf_20(Consume, 1) : !aie.objectfifosubview> + %elemWts = aie.objectfifo.subview.access %subviewWts[0] : !aie.objectfifosubview> -> memref<16384xi8> + %scale = memref.load %rtp22[%c0] : memref<16xi32> + + scf.for %n = %c0 to %y_dim step %c1 { + %subviewIn = aie.objectfifo.acquire @act_13_22(Consume, 1) : !aie.objectfifosubview> + %elemIn = aie.objectfifo.subview.access %subviewIn[0] : !aie.objectfifosubview> -> memref<32x1x256xui8> + + %subviewOut = aie.objectfifo.acquire @act_22_23_25(Produce, 1) : !aie.objectfifosubview> + %elemOut0 = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + + func.call @conv2dk1_ui8(%elemIn,%elemWts, %elemOut0,%x_dim,%ci,%co,%scale) : (memref<32x1x256xui8>,memref<16384xi8>, memref<32x1x64xui8>,i32,i32,i32,i32) -> () + + aie.objectfifo.release @act_13_22(Consume, 1) + aie.objectfifo.release @act_22_23_25(Produce, 1) + + } + aie.objectfifo.release @wts_buf_20(Consume, 1) + } + aie.end + } { link_with="conv2dk1_ui8.o" } + + // 3x3 conv + aie.core(%tile23) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + + %x_dim = arith.constant 32 : i32 + %y_dim_minus_2 = arith.constant 30 : index + + %ci = arith.constant 64 : i32 + %co = arith.constant 32 : i32 + + %kx_dim = arith.constant 3 : i32 + %ky_dim = arith.constant 3 : i32 + + %top = arith.constant 0 : i32 + %middle = arith.constant 1 : i32 + %bottom = arith.constant 2 : i32 + + %co_offset = arith.constant 0 : i32 + // acquire wts once + // %subviewWts = aie.objectfifo.acquire(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) : !aie.objectfifosubview> + // %scale = memref.load %rtp3[%c0] : memref<16xi32> + + %scale = arith.constant 1 : i32 + %intmax = arith.constant 0xFFFFFFFF : index + scf.for %arg3 = %c0 to %intmax step %c1 { + %subviewWts = aie.objectfifo.acquire @wts_buf_21(Consume, 1) : !aie.objectfifosubview> + %elemWts = aie.objectfifo.subview.access %subviewWts[0] : !aie.objectfifosubview> -> memref<36864xi8> + + // Preamble : Top Border + + %subviewIn = aie.objectfifo.acquire @act_22_23_25(Consume, 2) : !aie.objectfifosubview> + %elemIn0 = aie.objectfifo.subview.access %subviewIn[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1 = aie.objectfifo.subview.access %subviewIn[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut = aie.objectfifo.acquire @act_23_24(Produce, 1) : !aie.objectfifosubview> + %elemOut = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + + + func.call @conv2dk3_ui8(%elemIn0,%elemIn0,%elemIn1,%elemWts, %elemOut,%x_dim,%ci,%co,%kx_dim,%ky_dim,%top,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + + aie.objectfifo.release @act_23_24(Produce, 1) + + // Middle + scf.for %n = %c0 to %y_dim_minus_2 step %c1 { + %subviewIn1 = aie.objectfifo.acquire @act_22_23_25(Consume, 3) : !aie.objectfifosubview> + %elemIn1_0 = aie.objectfifo.subview.access %subviewIn1[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1_1 = aie.objectfifo.subview.access %subviewIn1[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1_2 = aie.objectfifo.subview.access %subviewIn1[2] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut1 = aie.objectfifo.acquire @act_23_24(Produce, 1) : !aie.objectfifosubview> + %elemOut1 = aie.objectfifo.subview.access %subviewOut1[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + func.call @conv2dk3_ui8(%elemIn1_0,%elemIn1_1,%elemIn1_2,%elemWts, %elemOut1,%x_dim,%ci,%co,%kx_dim,%ky_dim,%middle,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + aie.objectfifo.release @act_23_24(Produce, 1) + aie.objectfifo.release @act_22_23_25(Consume, 1) + + } + // Postamble : Bottom Border + %subviewIn2 = aie.objectfifo.acquire @act_22_23_25(Consume, 2) : !aie.objectfifosubview> + %elemIn2_0 = aie.objectfifo.subview.access %subviewIn2[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn2_1 = aie.objectfifo.subview.access %subviewIn2[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut2 = aie.objectfifo.acquire @act_23_24(Produce, 1) : !aie.objectfifosubview> + %elemOut2 = aie.objectfifo.subview.access %subviewOut2[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + + func.call @conv2dk3_ui8(%elemIn2_0,%elemIn2_1,%elemIn2_1,%elemWts, %elemOut2,%x_dim,%ci,%co,%kx_dim,%ky_dim,%bottom,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + + aie.objectfifo.release @act_23_24(Produce, 1) + aie.objectfifo.release @act_22_23_25(Consume, 2) + + //release weights + aie.objectfifo.release @wts_buf_21(Consume, 1) + } + // aie.objectfifo.release(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) + aie.end + } { link_with="conv2dk3.o" } + + // 3x3 conv + aie.core(%tile25) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + + %x_dim = arith.constant 32 : i32 + %y_dim_minus_2 = arith.constant 30 : index + + %ci = arith.constant 64 : i32 + %co = arith.constant 32 : i32 + + %kx_dim = arith.constant 3 : i32 + %ky_dim = arith.constant 3 : i32 + + %top = arith.constant 0 : i32 + %middle = arith.constant 1 : i32 + %bottom = arith.constant 2 : i32 + + %co_offset = arith.constant 32 : i32 + %intmax = arith.constant 0xFFFFFFFF : index + // %scale = memref.load %rtp4[%c0] : memref<16xi32> + %scale = arith.constant 1 : i32 + scf.for %arg3 = %c0 to %intmax step %c1 { + // acquire wts once + // %subviewWts = aie.objectfifo.acquire(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) : !aie.objectfifosubview> + %subviewWts = aie.objectfifo.acquire @wts_buf_21(Consume, 1) : !aie.objectfifosubview> + %elemWts = aie.objectfifo.subview.access %subviewWts[0] : !aie.objectfifosubview> -> memref<36864xi8> + + // Preamble : Top Border + + %subviewIn = aie.objectfifo.acquire @act_22_23_25(Consume, 2) : !aie.objectfifosubview> + %elemIn0 = aie.objectfifo.subview.access %subviewIn[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1 = aie.objectfifo.subview.access %subviewIn[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut = aie.objectfifo.acquire @act_25_24(Produce, 1) : !aie.objectfifosubview> + %elemOut = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + + func.call @conv2dk3_ui8(%elemIn0,%elemIn0,%elemIn1,%elemWts, %elemOut,%x_dim,%ci,%co,%kx_dim,%ky_dim,%top,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + + aie.objectfifo.release @act_25_24(Produce, 1) + + // Middle + scf.for %n = %c0 to %y_dim_minus_2 step %c1 { + %subviewIn1 = aie.objectfifo.acquire @act_22_23_25(Consume, 3) : !aie.objectfifosubview> + %elemIn1_0 = aie.objectfifo.subview.access %subviewIn1[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1_1 = aie.objectfifo.subview.access %subviewIn1[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn1_2 = aie.objectfifo.subview.access %subviewIn1[2] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut1 = aie.objectfifo.acquire @act_25_24(Produce, 1) : !aie.objectfifosubview> + %elemOut1 = aie.objectfifo.subview.access %subviewOut1[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + func.call @conv2dk3_ui8(%elemIn1_0,%elemIn1_1,%elemIn1_2,%elemWts, %elemOut1,%x_dim,%ci,%co,%kx_dim,%ky_dim,%middle,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + aie.objectfifo.release @act_25_24(Produce, 1) + aie.objectfifo.release @act_22_23_25(Consume, 1) + + } + // Postamble : Bottom Border + %subviewIn2 = aie.objectfifo.acquire @act_22_23_25(Consume, 2) : !aie.objectfifosubview> + %elemIn2_0 = aie.objectfifo.subview.access %subviewIn2[0] : !aie.objectfifosubview> -> memref<32x1x64xui8> + %elemIn2_1 = aie.objectfifo.subview.access %subviewIn2[1] : !aie.objectfifosubview> -> memref<32x1x64xui8> + + %subviewOut2 = aie.objectfifo.acquire @act_25_24(Produce, 1) : !aie.objectfifosubview> + %elemOut2 = aie.objectfifo.subview.access %subviewOut2[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + + func.call @conv2dk3_ui8(%elemIn2_0,%elemIn2_1,%elemIn2_1,%elemWts, %elemOut2,%x_dim,%ci,%co,%kx_dim,%ky_dim,%bottom,%scale,%co_offset ) : (memref<32x1x64xui8>, memref<32x1x64xui8>, memref<32x1x64xui8>, memref<36864xi8>,memref<32x1x32xui8>,i32,i32,i32,i32,i32,i32,i32,i32) -> () + + + aie.objectfifo.release @act_25_24(Produce, 1) + aie.objectfifo.release @act_22_23_25(Consume, 2) + + //release weights + aie.objectfifo.release @wts_buf_21(Consume, 1) + // aie.objectfifo.release(%inOF_wts_0_L3L2 : !aie.objectfifo>, 1) + } + aie.end + + } { link_with="conv2dk3.o" } + + // 1x1 conv with skip + aie.core(%tile24) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + + %x_dim = arith.constant 32 : i32 + %y_dim = arith.constant 32 : index + + %ci = arith.constant 64 : i32 + %co = arith.constant 256 : i32 + + %intmax = arith.constant 0xFFFFFFFF : index + scf.for %arg3 = %c0 to %intmax step %c1 { + // acquire wts once + %subviewWts = aie.objectfifo.acquire @wts_buf_22(Consume, 1) : !aie.objectfifosubview> + %elemWts = aie.objectfifo.subview.access %subviewWts[0] : !aie.objectfifosubview> -> memref<16384xi8> + + %scale = memref.load %rtp24[%c0] : memref<16xi32> + %skip_scale = memref.load %rtp24[%c1] : memref<16xi32> + // %skip_scale = arith.constant 0 : i32 + scf.for %n = %c0 to %y_dim step %c1 { + %subviewIn0 = aie.objectfifo.acquire @act_23_24(Consume, 1) : !aie.objectfifosubview> + %elemIn0 = aie.objectfifo.subview.access %subviewIn0[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + %subviewIn1 = aie.objectfifo.acquire @act_25_24(Consume, 1) : !aie.objectfifosubview> + %elemIn1 = aie.objectfifo.subview.access %subviewIn1[0] : !aie.objectfifosubview> -> memref<32x1x32xui8> + + %subviewOut = aie.objectfifo.acquire @outOFL2L3(Produce, 1) : !aie.objectfifosubview> + %elemOut0 = aie.objectfifo.subview.access %subviewOut[0] : !aie.objectfifosubview> -> memref<32x1x256xui8> + + %subviewSkip = aie.objectfifo.acquire @skip_buf3(Consume, 1) : !aie.objectfifosubview> + %elemSkip = aie.objectfifo.subview.access %subviewSkip[0] : !aie.objectfifosubview> -> memref<32x1x256xui8> + + + // %skip_scale = arith.constant 0 : i32 + func.call @conv2dk1_skip_ui8(%elemIn0,%elemIn1,%elemWts, %elemOut0,%elemSkip,%x_dim,%ci,%co,%scale,%skip_scale) : (memref<32x1x32xui8>,memref<32x1x32xui8>, memref<16384xi8>,memref<32x1x256xui8>,memref<32x1x256xui8>,i32,i32,i32,i32,i32) -> () + + aie.objectfifo.release @outOFL2L3(Produce, 1) + aie.objectfifo.release @act_23_24(Consume, 1) + aie.objectfifo.release @act_25_24(Consume, 1) + aie.objectfifo.release @skip_buf3(Consume, 1) + + } + aie.objectfifo.release @wts_buf_22(Consume, 1) + } + aie.end + } { link_with="conv2dk1_skip.o" } + + + func.func @sequence(%in0 : memref<16384xi32>, %wts0 : memref<53248xi32>, %out : memref<65536xi32>) { + // Trace output + + // Trace_Event0, Trace_Event1: Select which events to trace. + // Note that the event buffers only appear to be transferred to DDR in + // bursts of 256 bytes. If less than 256 bytes are written, you may not + // see trace output, or only see it on the next iteration of your + // kernel invocation, as the buffer gets filled up. Note that, even + // though events are encoded as 4 byte words, it may take more than 64 + // events to fill the buffer to 256 bytes and cause a flush, since + // multiple repeating events can be 'compressed' by the trace mechanism. + // In order to always generate sufficient events, we add the "assert + // TRUE" event to one slot, which fires every cycle, and thus fills our + // buffer quickly. + + // Some events: + // TRUE (0x01) + // STREAM_STALL (0x18) + // LOCK_STALL (0x1A) + // EVENTS_CORE_INSTR_EVENT_1 (0x22) + // EVENTS_CORE_INSTR_EVENT_0 (0x21) + // INSTR_VECTOR (0x25) Core executes a vecotr MAC, ADD or compare instruction + // INSTR_LOCK_ACQUIRE_REQ (0x2C) Core executes a lock acquire instruction + // INSTR_LOCK_RELEASE_REQ (0x2D) Core executes a lock release instruction + // EVENTS_CORE_PORT_RUNNING_1 (0x4F) + // EVENTS_CORE_PORT_RUNNING_0 (0x4B) + + + // Trace_Event0 (4 slots) + aiex.ipu.write32 { column = 2 : i32, row = 4 : i32, address = 0x340E0 : ui32, value = 0x4B222125 : ui32 } + // Trace_Event1 (4 slots) + aiex.ipu.write32 { column = 2 : i32, row = 4 : i32, address = 0x340E4 : ui32, value = 0x2D2C1A4F : ui32 } + + // Event slots as configured above: + // 0: Kernel executes vector instruction + // 1: Event 0 -- Kernel starts + // 2: Event 1 -- Kernel done + // 3: Port_Running_0 + // 4: Port_Running_1 + // 5: Lock Stall + // 6: Lock Acquire Instr + // 7: Lock Release Instr + + // Stream_Switch_Event_Port_Selection_0 + // This is necessary to capture the Port_Running_0 and Port_Running_1 events + aiex.ipu.write32 { column = 2 : i32, row = 4 : i32, address = 0x3FF00 : ui32, value = 0x121 : ui32 } + + // Trace_Control0: Define trace start and stop triggers. Set start event TRUE. + aiex.ipu.write32 { column = 2 : i32, row = 4 : i32, address = 0x340D0 : ui32, value = 0x10000 : ui32 } + + // Start trace copy out. + aiex.ipu.writebd_shimtile { bd_id = 3 : i32, + buffer_length = 16384 : i32, + buffer_offset = 262144 : i32, + enable_packet = 0 : i32, + out_of_order_id = 0 : i32, + packet_id = 0 : i32, + packet_type = 0 : i32, + column = 0 : i32, + column_num = 1 : i32, + d0_stepsize = 0 : i32, + d0_size = 0 : i32, + d0_stride = 0 : i32, + d0_wrap = 0 : i32, + d1_stepsize = 0 : i32, + d1_wrap = 0 : i32, + d1_size = 0 : i32, + d1_stride = 0 : i32, + d2_stepsize = 0 : i32, + d2_size = 0 : i32, + d2_stride = 0 : i32, + ddr_id = 2 : i32, + iteration_current = 0 : i32, + iteration_stepsize = 0 : i32, + iteration_wrap = 0 : i32, + iteration_size = 0 : i32, + iteration_stride = 0 : i32, + lock_acq_enable = 0 : i32, + lock_acq_id = 0 : i32, + lock_acq_val = 0 : i32, + lock_rel_id = 0 : i32, + lock_rel_val = 0 : i32, + next_bd = 0 : i32, + use_next_bd = 0 : i32, + valid_bd = 1 : i32} + aiex.ipu.write32 { column = 0 : i32, row = 0 : i32, address = 0x1D20C : ui32, value = 0x3 : ui32 } + + //End trace dump + + + + aiex.ipu.rtp_write(0, 2, 0, 1) { buffer_sym_name = "rtp2" } + aiex.ipu.rtp_write(0, 3, 0, 1) { buffer_sym_name = "rtp3" } + aiex.ipu.rtp_write(0, 5, 0, 1) { buffer_sym_name = "rtp4" } + aiex.ipu.rtp_write(0, 4, 0, 1) { buffer_sym_name = "rtp5" } + aiex.ipu.rtp_write(0, 4, 1, 0) { buffer_sym_name = "rtp5" } + aiex.ipu.rtp_write(0, 4, 2, 1) { buffer_sym_name = "rtp5" } + + aiex.ipu.rtp_write(1, 5, 0, 1) { buffer_sym_name = "rtp15" } + aiex.ipu.rtp_write(1, 4, 0, 1) { buffer_sym_name = "rtp14" } + aiex.ipu.rtp_write(1, 2, 0, 1) { buffer_sym_name = "rtp12" } + aiex.ipu.rtp_write(1, 3, 0, 1) { buffer_sym_name = "rtp13" } + aiex.ipu.rtp_write(1, 3, 1, 0) { buffer_sym_name = "rtp13" } + + aiex.ipu.rtp_write(2, 2, 0, 1) { buffer_sym_name = "rtp22" } + aiex.ipu.rtp_write(2, 3, 0, 1) { buffer_sym_name = "rtp23" } + aiex.ipu.rtp_write(2, 5, 0, 1) { buffer_sym_name = "rtp25" } + aiex.ipu.rtp_write(2, 4, 0, 1) { buffer_sym_name = "rtp24" } + aiex.ipu.rtp_write(2, 4, 1, 0) { buffer_sym_name = "rtp24" } + + %c0 = arith.constant 0 : i32 + %c1 = arith.constant 1 : i32 + %act_in= arith.constant 16384 : i64 + %act_out= arith.constant 65536 : i64 + %total_wts = arith.constant 18432 : i64 + %total_wts_2 = arith.constant 17408 : i64 + %total_wts_3 = arith.constant 17408 : i64 + %total_wts_3_off = arith.constant 35840 : i64 + + //dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words]) + aiex.ipu.dma_memcpy_nd(0, 0, %in0[0, 0, 0, 0][1, 1, 1, %act_in][0, 0, 0]) {id = 0 : i64, metadata = @inOF_act_L3L2} : memref<16384xi32> + aiex.ipu.dma_memcpy_nd(0, 0, %out[0, 0, 0, 0][1, 1, 1, %act_out][0, 0, 0]) {id = 2 : i64, metadata = @outOFL2L3} : memref<65536xi32> + aiex.ipu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, 0][1, 1, 1, %total_wts][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_0_L3L2} : memref<53248xi32> + aiex.ipu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, %total_wts][1, 1, 1, %total_wts_2][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_1_L3L2} : memref<53248xi32> + aiex.ipu.dma_memcpy_nd(0, 0, %wts0[0, 0, 0, %total_wts_3_off][1, 1, 1, %total_wts_3][0, 0, 0]) {id = 1 : i64, metadata = @inOF_wts_2_L3L2} : memref<53248xi32> + + aiex.ipu.sync {channel = 0 : i32, column = 1 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} + return + } + + } +} \ No newline at end of file diff --git a/programming_examples/ml/resnet/layers_conv2_x/aie2.py b/programming_examples/ml/resnet/layers_conv2_x/aie2.py new file mode 100755 index 0000000000..385a4fc7a5 --- /dev/null +++ b/programming_examples/ml/resnet/layers_conv2_x/aie2.py @@ -0,0 +1,639 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.extras.dialects.ext import memref, arith +from aie.extras.dialects.ext.scf import range_, yield_ +from aie.extras.context import mlir_mod_ctx +from aie.ir import MemRefType, TypeAttr + +import sys + +# tracing definitions +trace_sz_in_bytes = 8192 +trace_sz_in_i32s = trace_sz_in_bytes // 4 +enableTrace = False + +# Define bottleneck layer sizes + +tensorInW = 32 +tensorInH = 32 +tensorInC = 256 + +tensorL1InC = tensorInC +tensorL1OutC = tensorL1InC // 4 + +tensorL2InC = tensorL1OutC +tensorL2OutC = tensorL2InC + +tensorL3InC = tensorL2OutC +tensorL3OutC = tensorL3InC * 4 + + +def bottleneck4AIEs(): + with mlir_mod_ctx() as ctx: + + @device(AIEDevice.ipu) + def deviceBody(): + + # define types + uint8_ty = IntegerType.get_unsigned(8) + int8_ty = IntegerType.get_signless(8) + int16_ty = IntegerType.get_signless(16) + int32_ty = IntegerType.get_signless(32) + + tensorLayer1In_ty = MemRefType.get( + ( + tensorInW, + 1, + tensorL1InC, + ), + int8_ty, + ) + weightsLayer1_ty = MemRefType.get((tensorL1InC * tensorL1OutC,), int8_ty) + tensorLayer1Out_ty = MemRefType.get( + ( + tensorInW, + 1, + tensorL1OutC, + ), + uint8_ty, + ) + + tensorLayer2In_ty = MemRefType.get( + ( + tensorInW, + 1, + tensorL2InC, + ), + uint8_ty, + ) + weightsLayer2_ty = MemRefType.get( + (3 * 3 * tensorL2InC * tensorL2OutC,), int8_ty + ) + tensorLayer2Out_ty = MemRefType.get( + ( + tensorInW, + 1, + tensorL2OutC // 2, + ), + uint8_ty, + ) + + tensorLayer3In_ty = MemRefType.get( + ( + tensorInW, + 1, + tensorL3InC // 2, + ), + uint8_ty, + ) + weightsLayer3_ty = MemRefType.get((tensorL3InC * tensorL3OutC,), int8_ty) + tensorLayer3Out_ty = MemRefType.get( + ( + tensorInW, + 1, + tensorL3OutC, + ), + uint8_ty, + ) + + allWeights_ty = MemRefType.get( + ( + tensorL1InC * tensorL1OutC + + 3 * 3 * tensorL2InC * tensorL2OutC + + tensorL3InC * tensorL3OutC, + ), + int8_ty, + ) + + # kernel definitions + conv2dk1 = external_func( + "conv2dk1_i8", + inputs=[ + tensorLayer1In_ty, + weightsLayer1_ty, + tensorLayer1Out_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + ], + ) + conv2dk3 = external_func( + "conv2dk3_ui8", + inputs=[ + tensorLayer2In_ty, + tensorLayer2In_ty, + tensorLayer2In_ty, + weightsLayer2_ty, + tensorLayer2Out_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + ], + ) + conv2dk1_skip = external_func( + "conv2dk1_skip_i8", + inputs=[ + tensorLayer3In_ty, + tensorLayer3In_ty, + weightsLayer3_ty, + tensorLayer3Out_ty, + tensorLayer1In_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + int32_ty, + ], + ) + + ShimTile = tile(0, 0) + MemTile = tile(0, 1) + ComputeTile2 = tile(0, 2) + ComputeTile3 = tile(0, 3) + ComputeTile4 = tile(0, 4) + ComputeTile5 = tile(0, 5) + + if enableTrace: + flow(ComputeTile4, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1) + + # runtime parameters + + rtpComputeTile2 = Buffer(ComputeTile2, [16], T.i32(), "rtpComputeTile2") + rtpComputeTile3 = Buffer(ComputeTile3, [16], T.i32(), "rtpComputeTile3") + rtpComputeTile4 = Buffer(ComputeTile4, [16], T.i32(), "rtpComputeTile4") + rtpComputeTile5 = Buffer(ComputeTile5, [16], T.i32(), "rtpComputeTile5") + + # set up data movement with OFs + # input tensor (with broadcast for skip connection) + of_inOF_act_L3L2 = object_fifo( + "inOF_act_L3L2", + ShimTile, + [ComputeTile2, MemTile], + [2, 2, 4], + tensorLayer1In_ty, + ) + of_skip_buf = object_fifo( + "skip_buf", MemTile, ComputeTile4, 2, tensorLayer1In_ty + ) + object_fifo_link(of_inOF_act_L3L2, of_skip_buf) + + # weights + inOF_wts_0_L3L2 = object_fifo( + "inOF_wts_0_L3L2", ShimTile, MemTile, 1, allWeights_ty + ) + of_wts_buf_00 = object_fifo( + "wts_buf_00", MemTile, ComputeTile2, 1, weightsLayer1_ty + ) + wts_buf_01 = object_fifo( + "wts_buf_01", + MemTile, + [ComputeTile3, ComputeTile5], + 1, + weightsLayer2_ty, + ) + wts_buf_02 = object_fifo( + "wts_buf_02", MemTile, ComputeTile4, 1, weightsLayer3_ty + ) + object_fifo_link(inOF_wts_0_L3L2, [of_wts_buf_00, wts_buf_01, wts_buf_02]) + + # activation tensor + of_act_2_3_5 = object_fifo( + "act_2_3_5", + ComputeTile2, + [ComputeTile3, ComputeTile5], + [2, 4, 4], + tensorLayer1Out_ty, + ) # 1x1 -> 3x3 + act_3_4 = object_fifo( + "act_3_4", ComputeTile3, ComputeTile4, 2, tensorLayer2Out_ty + ) # 3x3 -> 1x1 + act_5_4 = object_fifo( + "act_5_4", ComputeTile5, ComputeTile4, 2, tensorLayer2Out_ty + ) # 3x3 -> 1x1 + + # output tensor + outOFL2L3 = object_fifo( + "outOFL2L3", ComputeTile4, ShimTile, 2, tensorLayer3Out_ty + ) + + # 1x1 conv2d + @core(ComputeTile2, "conv2dk1.o") + def core_body(): + for _ in range_(sys.maxsize): + + # acquire weights once + element0Weights = of_wts_buf_00.acquire(ObjectFifoPort.Consume, 1) + scale = memref.load(rtpComputeTile2, [0]) + for _ in range_(tensorInH): + element0ActivactionsIn = of_inOF_act_L3L2.acquire( + ObjectFifoPort.Consume, 1 + ) + element0ActivactionsOut = of_act_2_3_5.acquire( + ObjectFifoPort.Produce, 1 + ) + res = call( + conv2dk1, + [ + element0ActivactionsIn, + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL1InC, + tensorL1OutC, + scale, + ], + ) + + objectfifo_release(ObjectFifoPort.Consume, "inOF_act_L3L2", 1) + + objectfifo_release(ObjectFifoPort.Produce, "act_2_3_5", 1) + yield_([]) + objectfifo_release(ObjectFifoPort.Consume, "wts_buf_00", 1) + yield_([]) + + # 3x3 conv2d OFM 0-31 + @core(ComputeTile3, "conv2dk3.o") + def core_body(): + scale = 11 + for _ in range_(sys.maxsize): + + # acquire weights and rtps once + element0Weights = wts_buf_01.acquire(ObjectFifoPort.Consume, 1) + # scale = memref.load(rtpComputeTile3, 0) + + # pre-amble: top row + elementActivactionsIn = of_act_2_3_5.acquire( + ObjectFifoPort.Consume, 2 + ) + element0ActivactionsOut = act_3_4.acquire(ObjectFifoPort.Produce, 1) + res = call( + conv2dk3, + [ + elementActivactionsIn[0], + elementActivactionsIn[0], + elementActivactionsIn[1], + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL2InC, + tensorL2OutC, + 3, + 3, + 0, + scale, + 0, + ], + ) + objectfifo_release(ObjectFifoPort.Produce, "act_3_4", 1) + + # middle + for _ in range_(tensorInH - 2): + elementActivactionsIn = of_act_2_3_5.acquire( + ObjectFifoPort.Consume, 3 + ) + element0ActivactionsOut = act_3_4.acquire( + ObjectFifoPort.Produce, 1 + ) + res = call( + conv2dk3, + [ + elementActivactionsIn[0], + elementActivactionsIn[1], + elementActivactionsIn[2], + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL2InC, + tensorL2OutC, + 3, + 3, + 1, + scale, + 0, + ], + ) + + objectfifo_release(ObjectFifoPort.Consume, "act_2_3_5", 1) + objectfifo_release(ObjectFifoPort.Produce, "act_3_4", 1) + yield_([]) + + # last part + elementActivactionsIn = of_act_2_3_5.acquire( + ObjectFifoPort.Consume, 2 + ) + element0ActivactionsOut = act_3_4.acquire(ObjectFifoPort.Produce, 1) + res = call( + conv2dk3, + [ + elementActivactionsIn[0], + elementActivactionsIn[1], + elementActivactionsIn[1], + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL2InC, + tensorL2OutC, + 3, + 3, + 2, + scale, + 0, + ], + ) + + objectfifo_release(ObjectFifoPort.Consume, "act_2_3_5", 2) + objectfifo_release(ObjectFifoPort.Produce, "act_3_4", 1) + + objectfifo_release(ObjectFifoPort.Consume, "wts_buf_01", 1) + yield_([]) + + # 3x3 conv2d OFM 32-63 + @core(ComputeTile5, "conv2dk3.o") + def core_body(): + scale = 11 + for _ in range_(sys.maxsize): + + # acquire weights and rtps once + element0Weights = wts_buf_01.acquire(ObjectFifoPort.Consume, 1) + # scale = memref.load(rtpComputeTile5, 0) + + # pre-amble: top row + elementActivactionsIn = of_act_2_3_5.acquire( + ObjectFifoPort.Consume, 2 + ) + element0ActivactionsOut = act_5_4.acquire(ObjectFifoPort.Produce, 1) + res = call( + conv2dk3, + [ + elementActivactionsIn[0], + elementActivactionsIn[0], + elementActivactionsIn[1], + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL2InC, + tensorL2OutC, + 3, + 3, + 0, + scale, + tensorL2OutC // 2, + ], + ) + + objectfifo_release(ObjectFifoPort.Produce, "act_5_4", 1) + + # middle + for _ in range_(tensorInH - 2): + elementActivactionsIn = of_act_2_3_5.acquire( + ObjectFifoPort.Consume, 3 + ) + element0ActivactionsOut = act_5_4.acquire( + ObjectFifoPort.Produce, 1 + ) + res = call( + conv2dk3, + [ + elementActivactionsIn[0], + elementActivactionsIn[1], + elementActivactionsIn[2], + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL2InC, + tensorL2OutC, + 3, + 3, + 1, + scale, + tensorL2OutC // 2, + ], + ) + + objectfifo_release(ObjectFifoPort.Consume, "act_2_3_5", 1) + objectfifo_release(ObjectFifoPort.Produce, "act_5_4", 1) + yield_([]) + + # last part + elementActivactionsIn = of_act_2_3_5.acquire( + ObjectFifoPort.Consume, 2 + ) + element0ActivactionsOut = act_5_4.acquire(ObjectFifoPort.Produce, 1) + res = call( + conv2dk3, + [ + elementActivactionsIn[0], + elementActivactionsIn[1], + elementActivactionsIn[1], + element0Weights, + element0ActivactionsOut, + tensorInW, + tensorL2InC, + tensorL2OutC, + 3, + 3, + 2, + scale, + tensorL2OutC // 2, + ], + ) + objectfifo_release(ObjectFifoPort.Consume, "act_2_3_5", 2) + objectfifo_release(ObjectFifoPort.Produce, "act_5_4", 1) + objectfifo_release(ObjectFifoPort.Consume, "wts_buf_01", 1) + yield_([]) + + # # 1x1 conv2d and add skip + @core(ComputeTile4, "conv2dk1_skip.o") + def core_body(): + for _ in range_(sys.maxsize): + + # acquire weights and rtps once + element0Weights = wts_buf_02.acquire(ObjectFifoPort.Consume, 1) + scale = memref.load(rtpComputeTile4, [0]) + skipScale = memref.load(rtpComputeTile4, [1]) + + for _ in range_(tensorInH): + element0ActivactionsIn = act_3_4.acquire( + ObjectFifoPort.Consume, 1 + ) + element1ActivactionsIn = act_5_4.acquire( + ObjectFifoPort.Consume, 1 + ) + elementSkipsIn = of_skip_buf.acquire(ObjectFifoPort.Consume, 1) + elementActivactionsOut = outOFL2L3.acquire( + ObjectFifoPort.Produce, 1 + ) + + call( + conv2dk1_skip, + [ + element0ActivactionsIn, + element1ActivactionsIn, + element0Weights, + elementActivactionsOut, + elementSkipsIn, + tensorInW, + tensorL3InC, + tensorL3OutC, + scale, + skipScale, + ], + ) + objectfifo_release(ObjectFifoPort.Produce, "outOFL2L3", 1) + objectfifo_release(ObjectFifoPort.Consume, "act_3_4", 1) + objectfifo_release(ObjectFifoPort.Consume, "act_5_4", 1) + objectfifo_release(ObjectFifoPort.Consume, "skip_buf", 1) + yield_([]) + objectfifo_release(ObjectFifoPort.Consume, "wts_buf_02", 1) + yield_([]) + + # instruction stream generation + activationsInSize32b = (tensorInW * tensorInH * tensorInC) // 4 + acitivationsOutSize32b = activationsInSize32b + totalWeightsSize32b = ( + tensorL1InC * tensorL1OutC + + 3 * 3 * tensorL2InC * tensorL2OutC + + tensorL3InC * tensorL3OutC + ) // 4 + + activationsInL3_ty = MemRefType.get((activationsInSize32b,), int32_ty) + weightsInL3_ty = MemRefType.get((totalWeightsSize32b,), int32_ty) + + @FuncOp.from_py_func(activationsInL3_ty, weightsInL3_ty, activationsInL3_ty) + def sequence(inputFromL3, weightsFromL3, outputToL3): + + if enableTrace: + # Trace output + + # Trace_Event0, Trace_Event1: Select which events to trace. + # Note that the event buffers only appear to be transferred to DDR in + # bursts of 256 bytes. If less than 256 bytes are written, you may not + # see trace output, or only see it on the next iteration of your + # kernel invocation, as the buffer gets filled up. Note that, even + # though events are encoded as 4 byte words, it may take more than 64 + # events to fill the buffer to 256 bytes and cause a flush, since + # multiple repeating events can be 'compressed' by the trace mechanism. + # In order to always generate sufficient events, we add the "assert + # TRUE" event to one slot, which fires every cycle, and thus fills our + # buffer quickly. + + # Some events: + # TRUE (0x01) + # STREAM_STALL (0x18) + # LOCK_STALL (0x1A) + # EVENTS_CORE_INSTR_EVENT_1 (0x22) + # EVENTS_CORE_INSTR_EVENT_0 (0x21) + # INSTR_VECTOR (0x25) Core executes a vecotr MAC, ADD or compare instruction + # INSTR_LOCK_ACQUIRE_REQ (0x2C) Core executes a lock .acquire instruction + # INSTR_LOCK_.release_REQ (0x2D) Core executes a lock .release instruction + # EVENTS_CORE_PORT_RUNNING_1 (0x4F) + # EVENTS_CORE_PORT_RUNNING_0 (0x4B) + + # Trace_Event0 (4 slots) + ipu_write32(0, 4, 0x340E0, 0x4B222125) + # Trace_Event1 (4 slots) + ipu_write32(0, 4, 0x340E4, 0x2D2C1A4F) + + # Event slots as configured above: + # 0: Kernel executes vector instruction + # 1: Event 0 -- Kernel starts + # 2: Event 1 -- Kernel done + # 3: Port_Running_0 + # 4: Port_Running_1 + # 5: Lock Stall + # 6: Lock .acquire Instr + # 7: Lock .release Instr + + # Stream_Switch_Event_Port_Selection_0 + # This is necessary to capture the Port_Running_0 and Port_Running_1 events + ipu_write32(0, 4, 0x3FF00, 0x121) + + # Trace_Control0: Define trace start and stop triggers. Set start event TRUE. + ipu_write32(0, 4, 0x340D0, 0x10000) + + # Start trace copy out. + ipu_writebd_shimtile( + bd_id=3, + buffer_length=trace_sz_in_i32s, + buffer_offset=acitivationsOutSize32b, + enable_packet=0, + out_of_order_id=0, + packet_id=0, + packet_type=0, + column=0, + column_num=1, + d0_stepsize=0, + d0_wrap=0, + d1_stepsize=0, + d1_wrap=0, + d2_stepsize=0, + ddr_id=2, + iteration_current=0, + iteration_stepsize=0, + iteration_wrap=0, + lock_acq_enable=0, + lock_acq_id=0, + lock_acq_val=0, + lock_rel_id=0, + lock_rel_val=0, + next_bd=0, + use_next_bd=0, + valid_bd=1, + ) + ipu_write32(0, 2, 0x1D20C, 0x3) + + # write RTP parameters + IpuWriteRTPOp( + "rtpComputeTile2", col=0, row=2, index=0, value=1 + ) # scale + IpuWriteRTPOp( + "rtpComputeTile3", col=0, row=3, index=0, value=1 + ) # scale + IpuWriteRTPOp( + "rtpComputeTile5", col=0, row=5, index=0, value=1 + ) # scale + IpuWriteRTPOp( + "rtpComputeTile4", col=0, row=4, index=0, value=1 + ) # scale: conv1x1 with the same scale as the input so we match the scaling factor of output after conv1x1 and the initial input + IpuWriteRTPOp( + "rtpComputeTile4", col=0, row=4, index=1, value=0 + ) # skip_scale + + ipu_dma_memcpy_nd( + metadata="inOF_act_L3L2", + bd_id=0, + mem=inputFromL3, + sizes=[1, 1, 1, activationsInSize32b], + ) + ipu_dma_memcpy_nd( + metadata="outOFL2L3", + bd_id=2, + mem=outputToL3, + sizes=[1, 1, 1, acitivationsOutSize32b], + ) + ipu_dma_memcpy_nd( + metadata="inOF_wts_0_L3L2", + bd_id=1, + mem=weightsFromL3, + sizes=[1, 1, 1, totalWeightsSize32b], + ) + + ipu_sync(column=0, row=0, direction=0, channel=0) + + print(ctx.module) + + +bottleneck4AIEs() diff --git a/programming_examples/ml/resnet/layers_conv2_x/requirements.txt b/programming_examples/ml/resnet/layers_conv2_x/requirements.txt new file mode 100755 index 0000000000..08ed5eeb4b --- /dev/null +++ b/programming_examples/ml/resnet/layers_conv2_x/requirements.txt @@ -0,0 +1 @@ +torch \ No newline at end of file diff --git a/programming_examples/ml/resnet/layers_conv2_x/run.lit b/programming_examples/ml/resnet/layers_conv2_x/run.lit new file mode 100755 index 0000000000..61f43e45e6 --- /dev/null +++ b/programming_examples/ml/resnet/layers_conv2_x/run.lit @@ -0,0 +1,14 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, chess, torch +// +// RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DINT8_ACT -c %S/../../../../aie_kernels/aie2/conv2dk1.cc -o conv2dk1_i8.o +// RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DUINT8_ACT -c %S/../../../../aie_kernels/aie2/conv2dk3.cc -o conv2dk3.o +// RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DINT8_ACT -c %S/../../../../aie_kernels/aie2/conv2dk1_skip_init.cc -o conv2dk1_skip_init.o +// RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DSCALAR -DUINT8_ACT -c %S/../../../../aie_kernels/aie2/conv2dk1.cc -o conv2dk1_ui8.o +// RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DSCALAR -DUINT8_ACT -c %S/../../../../aie_kernels/aie2/conv2dk1_skip.cc -o conv2dk1_skip.o +// RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir +// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir +// RUN: %run_on_ipu %python %S/test.py | FileCheck %s +// CHECK: PASS! \ No newline at end of file diff --git a/programming_examples/ml/resnet/layers_conv2_x/test.py b/programming_examples/ml/resnet/layers_conv2_x/test.py new file mode 100755 index 0000000000..02dc01b127 --- /dev/null +++ b/programming_examples/ml/resnet/layers_conv2_x/test.py @@ -0,0 +1,436 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. + +import torch +import torch.nn as nn +import sys +import math +from aie.utils.ml import DataShaper +import time +import os +import numpy as np +from aie.utils.xrt import setup_aie, extract_trace, write_out_trace, execute + +torch.use_deterministic_algorithms(True) +torch.manual_seed(0) + +design = "resnet_conv2_x_int8" +xclbin_path = os.path.abspath("build/final.xclbin") +insts_path = os.path.abspath("build/insts.txt") + +log_folder = "log/" +if not os.path.exists(log_folder): + os.makedirs(log_folder) + +num_iter = 1 +npu_time_total = 0 +npu_time_min = 9999999 +npu_time_max = 0 +trace_size = 16384 +enable_trace = False +trace_file = "log/trace_" + design + ".txt" +# ------------------------------------------------------ +# Configure this to match your design's buffer size +# ------------------------------------------------------ +dtype_in = np.dtype("int8") +dtype_wts = np.dtype("int8") +dtype_out = np.dtype("uint8") + +shape_in_act = (32, 8, 32, 8) +shape_total_wts = (212992, 1) +shape_out = (32, 32, 32, 8) + +# ------------------------------------------------------ +# Initialize activation, weights, scaling factor for int8 model +# ------------------------------------------------------ +int_inp = torch.randint(1, 10, (1, 64, 32, 32)).type(torch.FloatTensor) +block_0_int_weight_1 = torch.randint(10, 20, (64, 64, 1, 1)).type(torch.FloatTensor) +block_0_int_weight_2 = torch.randint(10, 20, (64, 64, 3, 3)).type(torch.FloatTensor) +block_0_int_weight_3 = torch.randint(10, 20, (256, 64, 1, 1)).type(torch.FloatTensor) +block_0_int_weight_skip = torch.randint(10, 20, (256, 64, 1, 1)).type(torch.FloatTensor) + +block_1_int_weight_1 = torch.randint(20, 30, (64, 256, 1, 1)).type(torch.FloatTensor) +block_1_int_weight_2 = torch.randint(20, 30, (64, 64, 3, 3)).type(torch.FloatTensor) +block_1_int_weight_3 = torch.randint(20, 30, (256, 64, 1, 1)).type(torch.FloatTensor) + +block_2_int_weight_1 = torch.randint(30, 40, (64, 256, 1, 1)).type(torch.FloatTensor) +block_2_int_weight_2 = torch.randint(30, 40, (64, 64, 3, 3)).type(torch.FloatTensor) +block_2_int_weight_3 = torch.randint(30, 40, (256, 64, 1, 1)).type(torch.FloatTensor) + +init_scale = 0.5 +block_0_relu_1 = 0.5 +block_0_relu_2 = 0.5 +block_0_relu_3 = 0.5 + +block_0_weight_scale1 = 0.5 +block_0_weight_scale2 = 0.5 +block_0_weight_scale3 = 0.5 +block_0_weight_scale_skip = 0.5 + +block_1_relu_1 = 0.5 +block_1_relu_2 = 0.5 +block_1_relu_3 = 0.5 + +block_1_weight_scale1 = 0.5 +block_1_weight_scale2 = 0.5 +block_1_weight_scale3 = 0.5 +block_1_quant_add_1 = 0.5 + +block_2_relu_1 = 0.5 +block_2_relu_2 = 0.5 +block_2_relu_3 = 0.5 + +block_2_weight_scale1 = 0.5 +block_2_weight_scale2 = 0.5 +block_2_weight_scale3 = 0.5 +block_2_quant_add_1 = 0.5 + +block_0_combined_scale1 = -math.log2( + init_scale * block_0_weight_scale1 / block_0_relu_1 +) # RHS after first conv1x1 | clip 0-->255 +block_0_combined_scale2 = -math.log2( + block_0_relu_1 * block_0_weight_scale2 / block_0_relu_2 +) # RHS after second conv3x3 | clip 0-->255 +block_0_combined_scale3 = -math.log2( + block_0_relu_2 * block_0_weight_scale3 / init_scale +) # RHS after third conv1x1 | clip -128-->+127 +block_0_combined_scale_skip = -math.log2( + init_scale * block_0_weight_scale_skip / init_scale +) # LHS after conv1x1 | clip -128-->+127 +block_0_combined_scale4 = -math.log2( + init_scale / block_0_relu_3 +) # After addition | clip 0-->255 + +block_1_combined_scale1 = -math.log2( + block_0_relu_3 * block_1_weight_scale1 / block_1_relu_1 +) # RHS after first conv1x1 | clip 0-->255 +block_1_combined_scale2 = -math.log2( + block_1_relu_1 * block_1_weight_scale2 / block_1_relu_2 +) # RHS after second conv3x3 | clip 0-->255 +block_1_combined_scale3 = -math.log2( + block_1_relu_2 * block_1_weight_scale3 / block_1_quant_add_1 +) # RHS after third conv1x1 | clip -128-->+127 +block_1_combined_scale4 = -math.log2( + block_1_quant_add_1 / block_1_relu_3 +) # After addition | clip 0-->255 + +block_2_combined_scale1 = -math.log2( + block_1_relu_3 * block_2_weight_scale1 / block_2_relu_1 +) # RHS after first conv1x1 | clip 0-->255 +block_2_combined_scale2 = -math.log2( + block_2_relu_1 * block_2_weight_scale2 / block_2_relu_2 +) # RHS after second conv3x3 | clip 0-->255 +block_2_combined_scale3 = -math.log2( + block_2_relu_2 * block_2_weight_scale3 / block_2_quant_add_1 +) # RHS after third conv1x1 | clip -128-->+127 +block_2_combined_scale4 = -math.log2( + block_2_quant_add_1 / block_2_relu_3 +) # After addition | clip 0-->255 + +min = 0 +max = 255 + +# ------------------------------------------------------ +# Get device, load the xclbin & kernel and register them +# ------------------------------------------------------ +app = setup_aie( + xclbin_path, + insts_path, + shape_in_act, + dtype_in, + shape_total_wts, + dtype_wts, + shape_out, + dtype_out, + enable_trace=enable_trace, + trace_size=trace_size, +) + + +# ------------------------------------------------------ +# Define your golden reference +# ------------------------------------------------------ +class resnet_conv2_x_int8(nn.Module): + expansion = 4 + + def __init__(self, in_planes=64, planes=64): + super(resnet_conv2_x_int8, self).__init__() + + self.shortcut = nn.Conv2d( + in_planes, self.expansion * planes, kernel_size=1, bias=False + ) + # Bottleneck 0 + self.block_0_conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.block_0_conv2 = nn.Conv2d( + planes, planes, kernel_size=3, padding=1, padding_mode="zeros", bias=False + ) + self.block_0_conv3 = nn.Conv2d( + planes, self.expansion * planes, kernel_size=1, bias=False + ) + + self.block_0_relu1 = nn.ReLU() + self.block_0_relu2 = nn.ReLU() + self.block_0_relu3 = nn.ReLU() + + # Bottleneck 1 + self.block_1_conv1 = nn.Conv2d( + self.expansion * planes, planes, kernel_size=1, bias=False + ) + self.block_1_conv2 = nn.Conv2d( + planes, planes, kernel_size=3, padding=1, padding_mode="zeros", bias=False + ) + self.block_1_conv3 = nn.Conv2d( + planes, self.expansion * planes, kernel_size=1, bias=False + ) + + self.block_1_relu1 = nn.ReLU() + self.block_1_relu2 = nn.ReLU() + self.block_1_relu3 = nn.ReLU() + + # Bottleneck 2 + self.block_2_conv1 = nn.Conv2d( + self.expansion * planes, planes, kernel_size=1, bias=False + ) + self.block_2_conv2 = nn.Conv2d( + planes, planes, kernel_size=3, padding=1, padding_mode="zeros", bias=False + ) + self.block_2_conv3 = nn.Conv2d( + planes, self.expansion * planes, kernel_size=1, bias=False + ) + + self.block_2_relu1 = nn.ReLU() + self.block_2_relu2 = nn.ReLU() + self.block_2_relu3 = nn.ReLU() + + def forward(self, x): + # **************** Bottleneck 0 **************** + block_0_conv1_out = self.block_0_conv1(x) * init_scale * block_0_weight_scale1 + block_0_relu1_out = torch.clamp( + torch.round(self.block_0_relu1(block_0_conv1_out) / block_0_relu_1), + min, + max, + ) # convert to int and apply relu + block_0_conv2_out = ( + self.block_0_conv2(block_0_relu1_out) + * block_0_relu_1 + * block_0_weight_scale2 + ) + block_0_relu2_out = torch.clamp( + torch.round(self.block_0_relu2(block_0_conv2_out) / block_0_relu_2), + min, + max, + ) + block_0_conv3_out = ( + self.block_0_conv3(block_0_relu2_out) + * block_0_relu_2 + * block_0_weight_scale3 + ) + block_0_rhf_same_scale = torch.clamp( + torch.round(block_0_conv3_out / init_scale), -128, 127 + ) + + block_0_lhs_conv = self.shortcut(x) * init_scale * block_0_weight_scale_skip + block_0_lhs_same_scale = torch.clamp( + torch.round(block_0_lhs_conv / init_scale), -128, 127 + ) + # convert to int and apply relu + + block_0_skip_add = init_scale * ( + block_0_rhf_same_scale + block_0_lhs_same_scale + ) + block_0_final_out = torch.clamp( + torch.round(self.block_0_relu3(block_0_skip_add) / block_0_relu_3), min, max + ) + # **************** Bottleneck 1 **************** + block_1_conv1_out = ( + self.block_1_conv1(block_0_final_out) + * block_0_relu_3 + * block_1_weight_scale1 + ) + block_1_relu1_out = torch.clamp( + torch.round(self.block_1_relu1(block_1_conv1_out) / block_1_relu_1), + min, + max, + ) # convert to int and apply relu + block_1_conv2_out = ( + self.block_1_conv2(block_1_relu1_out) + * block_1_relu_1 + * block_1_weight_scale2 + ) + block_1_relu2_out = torch.clamp( + torch.round(self.block_1_relu2(block_1_conv2_out) / block_1_relu_2), + min, + max, + ) + block_1_conv3_out = ( + self.block_1_conv3(block_1_relu2_out) + * block_1_relu_2 + * block_1_weight_scale3 + ) + block_1_rhf_same_scale = torch.clamp( + torch.round(block_1_conv3_out / block_0_relu_3), -128, 127 + ) + + block_1_skip_add = block_0_relu_3 * (block_1_rhf_same_scale + block_0_final_out) + block_1_final_out = torch.clamp( + torch.round(self.block_1_relu3(block_1_skip_add) / block_1_relu_3), min, max + ) + + # **************** Bottleneck 2 **************** + block_2_conv1_out = ( + self.block_2_conv1(block_1_final_out) + * block_1_relu_3 + * block_2_weight_scale1 + ) + block_2_relu1_out = torch.clamp( + torch.round(self.block_2_relu1(block_2_conv1_out) / block_2_relu_1), + min, + max, + ) # convert to int and apply relu + block_2_conv2_out = ( + self.block_2_conv2(block_2_relu1_out) + * block_2_relu_1 + * block_2_weight_scale2 + ) + block_2_relu2_out = torch.clamp( + torch.round(self.block_2_relu2(block_2_conv2_out) / block_2_relu_2), + min, + max, + ) + block_2_conv3_out = ( + self.block_2_conv3(block_2_relu2_out) + * block_2_relu_2 + * block_2_weight_scale3 + ) + block_2_rhf_same_scale = torch.clamp( + torch.round(block_2_conv3_out / block_1_relu_3), -128, 127 + ) + + block_2_skip_add = block_1_relu_3 * (block_2_rhf_same_scale + block_1_final_out) + block_2_final_out = block_2_relu_3 * ( + torch.clamp( + torch.round(self.block_2_relu3(block_2_skip_add) / block_2_relu_3), + min, + max, + ) + ) + return block_2_final_out + + +# ------------------------------------------------------ +# Pytorch baseline +# ------------------------------------------------------ +model = resnet_conv2_x_int8() +model.eval() +model.block_0_conv1.weight.data.copy_(block_0_int_weight_1) +model.block_0_conv2.weight.data.copy_(block_0_int_weight_2) +model.block_0_conv3.weight.data.copy_(block_0_int_weight_3) +model.shortcut.weight.data.copy_(block_0_int_weight_skip) + +model.block_1_conv1.weight.data.copy_(block_1_int_weight_1) +model.block_1_conv2.weight.data.copy_(block_1_int_weight_2) +model.block_1_conv3.weight.data.copy_(block_1_int_weight_3) + +model.block_2_conv1.weight.data.copy_(block_2_int_weight_1) +model.block_2_conv2.weight.data.copy_(block_2_int_weight_2) +model.block_2_conv3.weight.data.copy_(block_2_int_weight_3) + +golden_output = model(int_inp) + +# ------------------------------------------------------ +# Reorder input data-layout +# ------------------------------------------------------ +ds = DataShaper() +before_input = int_inp.squeeze().data.numpy().astype(dtype_in) +before_input.tofile(log_folder + "/before_ifm_mem_fmt_1x1.txt", sep=",", format="%d") +ifm_mem_fmt = ds.reorder_mat(before_input, "YCXC8", "CYX") +ifm_mem_fmt.tofile(log_folder + "/after_ifm_mem_fmt_1x1.txt", sep=",", format="%d") + +block0_wts1 = ds.reorder_mat( + block_0_int_weight_1.data.numpy().astype(dtype_wts), "OIYXI8O8", "OIYX" +) +block0_wts2 = ds.reorder_mat( + block_0_int_weight_2.data.numpy().astype(dtype_wts), "OIYXI8O8", "OIYX" +) +block0_wts3 = ds.reorder_mat( + block_0_int_weight_3.data.numpy().astype(dtype_wts), "OIYXI8O8", "OIYX" +) +block0_wts_skip = ds.reorder_mat( + block_0_int_weight_skip.data.numpy().astype(dtype_wts), "OIYXI8O8", "OIYX" +) + +total_wts = np.concatenate( + (block0_wts1, block0_wts2, block0_wts3, block0_wts_skip), axis=None +) + +block1_wts1 = ds.reorder_mat( + block_1_int_weight_1.data.numpy().astype(dtype_wts), "OIYXI8O8", "OIYX" +) +block1_wts2 = ds.reorder_mat( + block_1_int_weight_2.data.numpy().astype(dtype_wts), "OIYXI8O8", "OIYX" +) +block1_wts3 = ds.reorder_mat( + block_1_int_weight_3.data.numpy().astype(dtype_wts), "OIYXI8O8", "OIYX" +) + +total_wts2 = np.concatenate( + (total_wts, block1_wts1, block1_wts2, block1_wts3), axis=None +) + +block2_wts1 = ds.reorder_mat( + block_2_int_weight_1.data.numpy().astype(dtype_wts), "OIYXI8O8", "OIYX" +) +block2_wts2 = ds.reorder_mat( + block_2_int_weight_2.data.numpy().astype(dtype_wts), "OIYXI8O8", "OIYX" +) +block2_wts3 = ds.reorder_mat( + block_2_int_weight_3.data.numpy().astype(dtype_wts), "OIYXI8O8", "OIYX" +) + +total_wts3 = np.concatenate( + (total_wts2, block2_wts1, block2_wts2, block2_wts3), axis=None +) + +total_wts3.tofile(log_folder + "/weights_mem_fmt_final.txt", sep=",", format="%d") + +# ------------------------------------------------------ +# Main run loop +# ------------------------------------------------------ +for i in range(num_iter): + start = time.time_ns() + aie_output = execute(app, ifm_mem_fmt, total_wts) * block_2_relu_3 + stop = time.time_ns() + + if enable_trace: + aie_output, trace = extract_trace(aie_output, shape_out, dtype_out, trace_size) + write_out_trace(trace, trace_file) + + npu_time = stop - start + npu_time_total = npu_time_total + npu_time + +# ------------------------------------------------------ +# Reorder output data-layout +# ------------------------------------------------------ +temp_out = aie_output.reshape(32, 32, 32, 8) +temp_out = ds.reorder_mat(temp_out, "CDYX", "YCXD") +ofm_mem_fmt = temp_out.reshape(256, 32, 32) +ofm_mem_fmt.tofile(log_folder + "/after_ofm_mem_fmt_final.txt", sep=",", format="%d") +ofm_mem_fmt_out = torch.from_numpy(ofm_mem_fmt).unsqueeze(0) + +# ------------------------------------------------------ +# Compare the AIE output and the golden reference +# ------------------------------------------------------ +print("\nAvg NPU time: {}us.".format(int((npu_time_total / num_iter) / 1000))) + +assert np.allclose( + ofm_mem_fmt_out.detach().numpy(), + golden_output.detach().numpy(), + rtol=0, + atol=block_2_relu_3, +) + +print("\nPASS!\n")