From 855e79b7ffa37ac64de51defa18104ed897576b3 Mon Sep 17 00:00:00 2001 From: Piotr Szarmanski Date: Fri, 30 Dec 2022 09:00:15 +0100 Subject: Initial buffer reuse encoding commit --- src/backend.lisp | 2 +- src/eris.lisp | 45 ++++++++++++++++++++++----------------------- src/file-backend.lisp | 7 ++++--- src/hash-backend.lisp | 5 +++-- src/package.lisp | 2 +- tests/encode-tests.lisp | 6 +++--- 6 files changed, 34 insertions(+), 33 deletions(-) diff --git a/src/backend.lisp b/src/backend.lisp index 9bb00d0..0230b24 100644 --- a/src/backend.lisp +++ b/src/backend.lisp @@ -22,7 +22,7 @@ (defclass decoding-backend () ((fetch-function :type function))) -(defgeneric fetch-read-capability (read-capability backend &key &allow-other-keys) +(defgeneric fetch-data (read-capability backend &key &allow-other-keys) (:documentation "Using the BACKEND, return a stream that decodes the provided READ-CAPABILITY object.")) diff --git a/src/eris.lisp b/src/eris.lisp index 91200a1..d86934b 100644 --- a/src/eris.lisp +++ b/src/eris.lisp @@ -166,38 +166,37 @@ versioning bytes are not supported by eris-cl." (defvar *output-hashmap* nil) -(defmacro output-block (ref-vector &rest expr) +(defmacro output-block (ref-vector) `(let ((reference (compute-reference block))) (if hash-output (unless (gethash reference *output-hashmap*) (let ((rk (encrypt-block block secret reference))) (vector-push-extend rk ,ref-vector) - (funcall output-function block (reference rk)) - (setf (gethash reference *output-hashmap*) t))) + (setf (gethash reference *output-hashmap*) t) + (funcall output-function block (reference rk)))) (let ((rk (encrypt-block block secret reference))) (vector-push-extend rk ,ref-vector) - (funcall output-function block (reference rk)))) - ,@expr)) + (funcall output-function block (reference rk)))))) -(defmacro output-internal-block (ref-vector nonce &rest expr) +(defmacro output-internal-block (ref-vector nonce) `(let ((reference (compute-reference block))) (if hash-output (unless (gethash reference *output-hashmap*) (let ((rk (encrypt-internal-block block reference ,nonce))) (vector-push-extend rk ,ref-vector) - (funcall output-function block (reference rk)) - (setf (gethash reference *output-hashmap*) t))) + (setf (gethash reference *output-hashmap*) t) + (funcall output-function block (reference rk)))) (let ((rk (encrypt-internal-block block reference ,nonce))) (vector-push-extend rk ,ref-vector) - (funcall output-function block (reference rk)))) - ,@expr)) + (funcall output-function block (reference rk)))))) (defgeneric eris-encode (input block-size output-function &key secret hash-output) (:documentation "Encode an INPUT into BLOCK-SIZE (32kib or 1kib) blocks, that are output using the function OUTPUT-FUNCTION. This function wil be called with two arguments: an -encoded block and a 32-byte reference octet vector. Returns a read-capability -object. +encoded block and a 32-byte reference octet vector, and it MUST return +a (SIMPLE-ARRAY (UNSIGNED-BYTE 8)) of equal size to the one given, which will be +destructively modified. Returns a read-capability object. An optional 32-byte secret can be passed for additional encryption using the SECRET keyword argument. @@ -215,11 +214,11 @@ guarantee that a reference is only output once.")) (let ((reference-vector (make-array 16 :adjustable t :fill-pointer 0)) (*output-hashmap* (if hash-output (make-hash-table :test #'equalp) nil))) (loop for block = (make-array block-size :element-type 'octet :initial-element 0) + ;; then (output-block reference-vector) for i = 0 then (incf i) until (= (length input) (* i block-size)) - do (progn - (replace block input :start2 (* i block-size)) - (output-block reference-vector nil))) + do (progn (replace block input :start2 (* i block-size)) + (setf block (output-block reference-vector)))) (eris-create-tree reference-vector block-size output-function :hash-output hash-output))) (defmethod eris-encode ((input stream) block-size output-function &key (secret null-secret) hash-output) @@ -230,11 +229,12 @@ guarantee that a reference is only output once.")) (let ((reference-vector (make-array 16 :adjustable t :fill-pointer 0)) (*output-hashmap* (if hash-output (make-hash-table :test #'equalp) nil))) (loop for block = (make-array block-size :element-type 'octet :initial-element 0) + ;;then (output-block reference-vector) for bytes-read = (read-sequence block input) for i = 0 then (incf i) if (< bytes-read block-size) do (setf (aref block bytes-read) #x80) - do (output-block reference-vector nil) + do (setf block (output-block reference-vector)) until (< bytes-read block-size)) (eris-create-tree reference-vector block-size output-function :hash-output hash-output))) @@ -246,10 +246,10 @@ guarantee that a reference is only output once.")) with reference-vector-l = (make-array 16 :adjustable t :fill-pointer 0) for nonce = (make-nonce (1+ level)) when (eql (length reference-vector) 1) - do (return (make-instance 'read-capability - :reference-pair (aref reference-vector 0) - :level level - :block-size block-size)) + do (return (make-instance 'read-capability + :reference-pair (aref reference-vector 0) + :level level + :block-size block-size)) do (progn (incf level) ;; loop across the key-reference vector and build the tree @@ -257,9 +257,8 @@ guarantee that a reference is only output once.")) for rk across reference-vector with i = 0 when (eql i block-keys) - do (output-internal-block reference-vector-l nonce - (setf block (make-array block-size :element-type 'octet :initial-element 0) - i 0)) + do (setf block (output-internal-block reference-vector-l nonce) + i 0) do (progn (reference-pair-to-octets rk block (* 64 i)) (incf i)) finally (unless (zerop i) diff --git a/src/file-backend.lisp b/src/file-backend.lisp index 03e3ef7..1a1c5b0 100644 --- a/src/file-backend.lisp +++ b/src/file-backend.lisp @@ -51,9 +51,10 @@ (let* ((base32 (bytes-to-base32-unpadded reference)) (file (merge-pathnames directory base32))) (unless (probe-file file) - (alexandria:write-byte-vector-into-file block file))))))) + (alexandria:write-byte-vector-into-file block file))) + block)))) -(defmethod fetch-read-capability (read-capability (backend file-backend) &key &allow-other-keys) +(defmethod fetch-data (read-capability (backend file-backend) &key &allow-other-keys) (declare (type read-capability read-capability)) (with-slots (fetch-function) backend (eris-decode read-capability fetch-function))) @@ -65,7 +66,7 @@ (if (> (etypecase input (stream (file-length input)) (vector (length input))) - 16384) + (* 2 16384)) 32kib 1kib) output-function diff --git a/src/hash-backend.lisp b/src/hash-backend.lisp index 91dc673..c44c2d0 100644 --- a/src/hash-backend.lisp +++ b/src/hash-backend.lisp @@ -32,9 +32,10 @@ output-function (lambda (block reference) (declare (type octet-vector block reference)) (setf (gethash reference hash-table) - block)))))) + block) + block))))) -(defmethod fetch-read-capability (read-capability (backend hash-backend) &key &allow-other-keys) +(defmethod fetch-data (read-capability (backend hash-backend) &key &allow-other-keys) (declare (type read-capability read-capability)) (with-slots (fetch-function) backend (eris-decode read-capability fetch-function :cache-capacity nil))) diff --git a/src/package.lisp b/src/package.lisp index c618c75..b5c7a4a 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -47,7 +47,7 @@ #:missing-block #:store-data - #:fetch-read-capability + #:fetch-data #:encoding-backend #:decoding-backend #:file-backend diff --git a/tests/encode-tests.lisp b/tests/encode-tests.lisp index 214eaad..36cc435 100644 --- a/tests/encode-tests.lisp +++ b/tests/encode-tests.lisp @@ -21,13 +21,13 @@ (vector-encode (read-capability-to-urn (eris-encode ,data ,block-size - (lambda (&rest args) (declare (ignore args))) + (lambda (block ref) (declare (ignore ref)) block) :secret ,secret))) (stream-encode (read-capability-to-urn (with-octet-input-stream (stream ,data) (eris-encode stream ,block-size - (lambda (&rest args) (declare (ignore args))) + (lambda (block ref) (declare (ignore ref)) block) :secret ,secret))))) (is (equalp vector-encode urn)) (is (equalp stream-encode urn)))) @@ -60,7 +60,7 @@ ,key) :initialization-vector (make-array 8 :element-type '(unsigned-byte 8) :initial-element 0) :direction :input))) - (let ((read-capability (eris-encode chacha-stream ,block-size (lambda (&rest args) (declare (ignore args)))))) + (let ((read-capability (eris-encode chacha-stream ,block-size (lambda (block ref) (declare (ignore ref)) block)))) (is (equalp (read-capability-to-urn read-capability) ,urn))))) -- cgit v1.2.3 From 5afc44c1082ae7088511f318aa9bd3d4b25ba3c6 Mon Sep 17 00:00:00 2001 From: Piotr Szarmanski Date: Fri, 30 Dec 2022 18:29:24 +0100 Subject: Implement buffer reuse encoding. This implements a buffer reuse mechanism in eris-encode. This is also a backwards incompatible change, as the provided OUTPUT-FUNCTION now has an additional argument and has to return an octet-vector buffer of equal size. This is not yet implemented optimally, but should amount to a reduce of memory usage and GC required, especially for larger files. --- src/eris.lisp | 48 ++++++++++++++++++++++++++++-------------------- src/hash-backend.lisp | 2 +- tests/backend-tests.lisp | 4 ++-- tests/decode-tests.lisp | 3 ++- tests/encode-tests.lisp | 6 ++++++ tests/rfc.lisp | 3 ++- 6 files changed, 41 insertions(+), 25 deletions(-) diff --git a/src/eris.lisp b/src/eris.lisp index d86934b..ba04c03 100644 --- a/src/eris.lisp +++ b/src/eris.lisp @@ -169,11 +169,12 @@ versioning bytes are not supported by eris-cl." (defmacro output-block (ref-vector) `(let ((reference (compute-reference block))) (if hash-output - (unless (gethash reference *output-hashmap*) - (let ((rk (encrypt-block block secret reference))) - (vector-push-extend rk ,ref-vector) - (setf (gethash reference *output-hashmap*) t) - (funcall output-function block (reference rk)))) + (if (gethash reference *output-hashmap*) + block + (let ((rk (encrypt-block block secret reference))) + (vector-push-extend rk ,ref-vector) + (setf (gethash reference *output-hashmap*) t) + (funcall output-function block (reference rk)))) (let ((rk (encrypt-block block secret reference))) (vector-push-extend rk ,ref-vector) (funcall output-function block (reference rk)))))) @@ -212,13 +213,16 @@ guarantee that a reference is only output once.")) (setf input (pad input block-size)) (let ((reference-vector (make-array 16 :adjustable t :fill-pointer 0)) - (*output-hashmap* (if hash-output (make-hash-table :test #'equalp) nil))) - (loop for block = (make-array block-size :element-type 'octet :initial-element 0) - ;; then (output-block reference-vector) - for i = 0 then (incf i) + (*output-hashmap* (if hash-output (make-hash-table :test #'equalp) nil)) + (block (make-array block-size :element-type 'octet :initial-element 0))) + (declare (type octet-vector block)) + (loop for i = 0 then (incf i) until (= (length input) (* i block-size)) do (progn (replace block input :start2 (* i block-size)) - (setf block (output-block reference-vector)))) + (setf block (output-block reference-vector)) + (fill block 0))) + ;; always bzero the buffer; this is unoptimal (it only needs to be zeroed out to eliminate trailing junk) + ;; TODO: consider removing this entire function and replacing it with an octet stream (eris-create-tree reference-vector block-size output-function :hash-output hash-output))) (defmethod eris-encode ((input stream) block-size output-function &key (secret null-secret) hash-output) @@ -227,14 +231,15 @@ guarantee that a reference is only output once.")) (type function output-function) (type (octet-vector 32) secret)) (let ((reference-vector (make-array 16 :adjustable t :fill-pointer 0)) - (*output-hashmap* (if hash-output (make-hash-table :test #'equalp) nil))) - (loop for block = (make-array block-size :element-type 'octet :initial-element 0) - ;;then (output-block reference-vector) - for bytes-read = (read-sequence block input) + (*output-hashmap* (if hash-output (make-hash-table :test #'equalp) nil)) + (block (make-array block-size :element-type 'octet :initial-element 0))) + (declare (type octet-vector block)) + (loop for bytes-read = (read-sequence block input) for i = 0 then (incf i) if (< bytes-read block-size) - do (setf (aref block bytes-read) #x80) - do (setf block (output-block reference-vector)) + do (progn (setf (aref block bytes-read) #x80) + (fill block 0 :start (1+ bytes-read))) ;; bzero the buffer here to eliminate trailing junk + do (progn (setf block (output-block reference-vector))) until (< bytes-read block-size)) (eris-create-tree reference-vector block-size output-function :hash-output hash-output))) @@ -255,14 +260,17 @@ guarantee that a reference is only output once.")) ;; loop across the key-reference vector and build the tree (loop with block = (make-array block-size :element-type 'octet :initial-element 0) for rk across reference-vector - with i = 0 + with i = 0 when (eql i block-keys) - do (setf block (output-internal-block reference-vector-l nonce) - i 0) + do (progn (setf block (output-internal-block reference-vector-l nonce)) + (setf i 0) + (fill block 0)) do (progn (reference-pair-to-octets rk block (* 64 i)) (incf i)) finally (unless (zerop i) - ;; If i is zero, then the amount of blocks is just right. Otherwise add a final unfinished block. + ;; If i is zero, then the amount of blocks is just + ;; right. Otherwise add a final unfinished block. + (output-internal-block reference-vector-l nonce))) (setf reference-vector reference-vector-l) (setf reference-vector-l (make-array 16 :adjustable t :fill-pointer 0))))) diff --git a/src/hash-backend.lisp b/src/hash-backend.lisp index c44c2d0..0fa095a 100644 --- a/src/hash-backend.lisp +++ b/src/hash-backend.lisp @@ -32,7 +32,7 @@ output-function (lambda (block reference) (declare (type octet-vector block reference)) (setf (gethash reference hash-table) - block) + (copy-seq block)) block))))) (defmethod fetch-data (read-capability (backend hash-backend) &key &allow-other-keys) diff --git a/tests/backend-tests.lisp b/tests/backend-tests.lisp index 625740f..dc411d5 100644 --- a/tests/backend-tests.lisp +++ b/tests/backend-tests.lisp @@ -21,7 +21,7 @@ `(let ((backend (make-instance 'hash-backend)) (array ,array)) (is (equalp (alexandria:read-stream-content-into-byte-vector - (fetch-read-capability + (fetch-data (store-data array backend :block-size ,block-size :secret ,secret) backend)) array)))) @@ -49,7 +49,7 @@ (let* ((backend (make-instance 'file-backend :directory tmpdir)) (array ,array)) (is (equalp (alexandria:read-stream-content-into-byte-vector - (fetch-read-capability + (fetch-data (store-data array backend :secret ,secret) backend)) array))) (uiop:delete-directory-tree tmpdir :validate t)))) diff --git a/tests/decode-tests.lisp b/tests/decode-tests.lisp index 27ff4e3..5053d11 100644 --- a/tests/decode-tests.lisp +++ b/tests/decode-tests.lisp @@ -22,7 +22,8 @@ (defvar *stream* nil) (defun hashtable-encode (block ref) - (setf (gethash ref *table*) block)) + (setf (gethash ref *table*) (copy-seq block)) + block) (defun hashtable-decode (ref) (copy-seq (gethash ref *table*))) diff --git a/tests/encode-tests.lisp b/tests/encode-tests.lisp index 36cc435..abbeb0d 100644 --- a/tests/encode-tests.lisp +++ b/tests/encode-tests.lisp @@ -36,6 +36,12 @@ (check-urn (base32-to-bytes-unpadded "JBSWY3DPEB3W64TMMQQQ") 1024 "urn:eris:BIAD77QDJMFAKZYH2DXBUZYAP3MXZ3DJZVFYQ5DFWC6T65WSFCU5S2IT4YZGJ7AC4SYQMP2DM2ANS2ZTCP3DJJIRV733CRAAHOSWIYZM3M")) +(test empty-stream + (check-urn (serapeum:make-octet-vector 0) 1024 + "urn:eris:BIADFUKDPYKJNLGCVSIIDI3FVKND7MO5AGOCXBK2C4ITT5MAL4LSCZF62B4PDOFQCLLNL7AXXSJFGINUYXVGVTDCQ2V7S7W5S234WFXCJ4") + (check-urn (serapeum:make-octet-vector 0) eris:32kib + "urn:eris:B4AC3MKL2BYR3E2WPMY2QRA6QZBLY4VNWJEBTSK5KWD66BRIT2EXVQVWY6TWVKJCZLC66RE3T2PKWDU3TBAKZZZIZRBTMP6BSOPE4CRXII")) + ;; simple gray stream class for this particular construction. (defclass null-stream (fundamental-binary-input-stream) ((counter :initform 0 :accessor counter) diff --git a/tests/rfc.lisp b/tests/rfc.lisp index 15b993e..dafa086 100644 --- a/tests/rfc.lisp +++ b/tests/rfc.lisp @@ -24,7 +24,8 @@ (defun test-output (block ref) (assert (equalp block - (base32-to-bytes-unpadded (getf *alist* (intern (bytes-to-base32-unpadded ref) :keyword)))))) + (base32-to-bytes-unpadded (getf *alist* (intern (bytes-to-base32-unpadded ref) :keyword))))) + block) (defmacro positive-test (urn content block-alist secret block-size) `(let ((*alist* ,block-alist) -- cgit v1.2.3