From 5afc44c1082ae7088511f318aa9bd3d4b25ba3c6 Mon Sep 17 00:00:00 2001 From: Piotr Szarmanski Date: Fri, 30 Dec 2022 18:29:24 +0100 Subject: Implement buffer reuse encoding. This implements a buffer reuse mechanism in eris-encode. This is also a backwards incompatible change, as the provided OUTPUT-FUNCTION now has an additional argument and has to return an octet-vector buffer of equal size. This is not yet implemented optimally, but should amount to a reduce of memory usage and GC required, especially for larger files. --- src/eris.lisp | 48 ++++++++++++++++++++++++++++-------------------- src/hash-backend.lisp | 2 +- tests/backend-tests.lisp | 4 ++-- tests/decode-tests.lisp | 3 ++- tests/encode-tests.lisp | 6 ++++++ tests/rfc.lisp | 3 ++- 6 files changed, 41 insertions(+), 25 deletions(-) diff --git a/src/eris.lisp b/src/eris.lisp index d86934b..ba04c03 100644 --- a/src/eris.lisp +++ b/src/eris.lisp @@ -169,11 +169,12 @@ versioning bytes are not supported by eris-cl." (defmacro output-block (ref-vector) `(let ((reference (compute-reference block))) (if hash-output - (unless (gethash reference *output-hashmap*) - (let ((rk (encrypt-block block secret reference))) - (vector-push-extend rk ,ref-vector) - (setf (gethash reference *output-hashmap*) t) - (funcall output-function block (reference rk)))) + (if (gethash reference *output-hashmap*) + block + (let ((rk (encrypt-block block secret reference))) + (vector-push-extend rk ,ref-vector) + (setf (gethash reference *output-hashmap*) t) + (funcall output-function block (reference rk)))) (let ((rk (encrypt-block block secret reference))) (vector-push-extend rk ,ref-vector) (funcall output-function block (reference rk)))))) @@ -212,13 +213,16 @@ guarantee that a reference is only output once.")) (setf input (pad input block-size)) (let ((reference-vector (make-array 16 :adjustable t :fill-pointer 0)) - (*output-hashmap* (if hash-output (make-hash-table :test #'equalp) nil))) - (loop for block = (make-array block-size :element-type 'octet :initial-element 0) - ;; then (output-block reference-vector) - for i = 0 then (incf i) + (*output-hashmap* (if hash-output (make-hash-table :test #'equalp) nil)) + (block (make-array block-size :element-type 'octet :initial-element 0))) + (declare (type octet-vector block)) + (loop for i = 0 then (incf i) until (= (length input) (* i block-size)) do (progn (replace block input :start2 (* i block-size)) - (setf block (output-block reference-vector)))) + (setf block (output-block reference-vector)) + (fill block 0))) + ;; always bzero the buffer; this is unoptimal (it only needs to be zeroed out to eliminate trailing junk) + ;; TODO: consider removing this entire function and replacing it with an octet stream (eris-create-tree reference-vector block-size output-function :hash-output hash-output))) (defmethod eris-encode ((input stream) block-size output-function &key (secret null-secret) hash-output) @@ -227,14 +231,15 @@ guarantee that a reference is only output once.")) (type function output-function) (type (octet-vector 32) secret)) (let ((reference-vector (make-array 16 :adjustable t :fill-pointer 0)) - (*output-hashmap* (if hash-output (make-hash-table :test #'equalp) nil))) - (loop for block = (make-array block-size :element-type 'octet :initial-element 0) - ;;then (output-block reference-vector) - for bytes-read = (read-sequence block input) + (*output-hashmap* (if hash-output (make-hash-table :test #'equalp) nil)) + (block (make-array block-size :element-type 'octet :initial-element 0))) + (declare (type octet-vector block)) + (loop for bytes-read = (read-sequence block input) for i = 0 then (incf i) if (< bytes-read block-size) - do (setf (aref block bytes-read) #x80) - do (setf block (output-block reference-vector)) + do (progn (setf (aref block bytes-read) #x80) + (fill block 0 :start (1+ bytes-read))) ;; bzero the buffer here to eliminate trailing junk + do (progn (setf block (output-block reference-vector))) until (< bytes-read block-size)) (eris-create-tree reference-vector block-size output-function :hash-output hash-output))) @@ -255,14 +260,17 @@ guarantee that a reference is only output once.")) ;; loop across the key-reference vector and build the tree (loop with block = (make-array block-size :element-type 'octet :initial-element 0) for rk across reference-vector - with i = 0 + with i = 0 when (eql i block-keys) - do (setf block (output-internal-block reference-vector-l nonce) - i 0) + do (progn (setf block (output-internal-block reference-vector-l nonce)) + (setf i 0) + (fill block 0)) do (progn (reference-pair-to-octets rk block (* 64 i)) (incf i)) finally (unless (zerop i) - ;; If i is zero, then the amount of blocks is just right. Otherwise add a final unfinished block. + ;; If i is zero, then the amount of blocks is just + ;; right. Otherwise add a final unfinished block. + (output-internal-block reference-vector-l nonce))) (setf reference-vector reference-vector-l) (setf reference-vector-l (make-array 16 :adjustable t :fill-pointer 0))))) diff --git a/src/hash-backend.lisp b/src/hash-backend.lisp index c44c2d0..0fa095a 100644 --- a/src/hash-backend.lisp +++ b/src/hash-backend.lisp @@ -32,7 +32,7 @@ output-function (lambda (block reference) (declare (type octet-vector block reference)) (setf (gethash reference hash-table) - block) + (copy-seq block)) block))))) (defmethod fetch-data (read-capability (backend hash-backend) &key &allow-other-keys) diff --git a/tests/backend-tests.lisp b/tests/backend-tests.lisp index 625740f..dc411d5 100644 --- a/tests/backend-tests.lisp +++ b/tests/backend-tests.lisp @@ -21,7 +21,7 @@ `(let ((backend (make-instance 'hash-backend)) (array ,array)) (is (equalp (alexandria:read-stream-content-into-byte-vector - (fetch-read-capability + (fetch-data (store-data array backend :block-size ,block-size :secret ,secret) backend)) array)))) @@ -49,7 +49,7 @@ (let* ((backend (make-instance 'file-backend :directory tmpdir)) (array ,array)) (is (equalp (alexandria:read-stream-content-into-byte-vector - (fetch-read-capability + (fetch-data (store-data array backend :secret ,secret) backend)) array))) (uiop:delete-directory-tree tmpdir :validate t)))) diff --git a/tests/decode-tests.lisp b/tests/decode-tests.lisp index 27ff4e3..5053d11 100644 --- a/tests/decode-tests.lisp +++ b/tests/decode-tests.lisp @@ -22,7 +22,8 @@ (defvar *stream* nil) (defun hashtable-encode (block ref) - (setf (gethash ref *table*) block)) + (setf (gethash ref *table*) (copy-seq block)) + block) (defun hashtable-decode (ref) (copy-seq (gethash ref *table*))) diff --git a/tests/encode-tests.lisp b/tests/encode-tests.lisp index 36cc435..abbeb0d 100644 --- a/tests/encode-tests.lisp +++ b/tests/encode-tests.lisp @@ -36,6 +36,12 @@ (check-urn (base32-to-bytes-unpadded "JBSWY3DPEB3W64TMMQQQ") 1024 "urn:eris:BIAD77QDJMFAKZYH2DXBUZYAP3MXZ3DJZVFYQ5DFWC6T65WSFCU5S2IT4YZGJ7AC4SYQMP2DM2ANS2ZTCP3DJJIRV733CRAAHOSWIYZM3M")) +(test empty-stream + (check-urn (serapeum:make-octet-vector 0) 1024 + "urn:eris:BIADFUKDPYKJNLGCVSIIDI3FVKND7MO5AGOCXBK2C4ITT5MAL4LSCZF62B4PDOFQCLLNL7AXXSJFGINUYXVGVTDCQ2V7S7W5S234WFXCJ4") + (check-urn (serapeum:make-octet-vector 0) eris:32kib + "urn:eris:B4AC3MKL2BYR3E2WPMY2QRA6QZBLY4VNWJEBTSK5KWD66BRIT2EXVQVWY6TWVKJCZLC66RE3T2PKWDU3TBAKZZZIZRBTMP6BSOPE4CRXII")) + ;; simple gray stream class for this particular construction. (defclass null-stream (fundamental-binary-input-stream) ((counter :initform 0 :accessor counter) diff --git a/tests/rfc.lisp b/tests/rfc.lisp index 15b993e..dafa086 100644 --- a/tests/rfc.lisp +++ b/tests/rfc.lisp @@ -24,7 +24,8 @@ (defun test-output (block ref) (assert (equalp block - (base32-to-bytes-unpadded (getf *alist* (intern (bytes-to-base32-unpadded ref) :keyword)))))) + (base32-to-bytes-unpadded (getf *alist* (intern (bytes-to-base32-unpadded ref) :keyword))))) + block) (defmacro positive-test (urn content block-alist secret block-size) `(let ((*alist* ,block-alist) -- cgit v1.2.3