librsync  2.3.4
scoop.c
Go to the documentation of this file.
1/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2 *
3 * librsync -- the library for network deltas
4 *
5 * Copyright (C) 2000, 2001 by Martin Pool <mbp@sourcefrog.net>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public License
9 * as published by the Free Software Foundation; either version 2.1 of
10 * the License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21
22 /*=
23 | To walk on water you've gotta sink
24 | in the ice.
25 | -- Shihad, `The General Electric'.
26 */
27
28/** \file scoop.c
29 * This file deals with readahead from caller-supplied buffers.
30 *
31 * Many functions require a certain minimum amount of contiguous input data to
32 * do their processing. For example, to calculate a strong checksum of a block
33 * we need at least a block of input.
34 *
35 * Since we put the buffers completely under the control of the caller, we
36 * can't count on ever getting this much data all in one go. We can't simply
37 * wait, because the caller might have a smaller buffer than we require and so
38 * we'll never get it.
39 *
40 * Stream input data is used directly if there is sufficient data to satisfy
41 * the readhead requests, otherwise it is copied and accumulated into an
42 * internal buffer until there is enough. This means for large input buffers we
43 * can leave a "tail" of unprocessed data in the input buffer, and only consume
44 * all the data if it was too small and start accumulating into the internal
45 * buffer. Provided the input buffers always have enough data we avoid copying
46 * into the internal buffer at all.
47 *
48 * \todo We probably know a maximum amount of data that can be scooped up, so
49 * we could just avoid dynamic allocation. However that can't be fixed at
50 * compile time, because when generating a delta it needs to be large enough to
51 * hold one full block. Perhaps we can set it up when the job is allocated? It
52 * would be kind of nice to not do any memory allocation after startup, as
53 * bzlib does this. */
54
55#include "config.h" /* IWYU pragma: keep */
56#include <assert.h>
57#include <stdlib.h>
58#include <string.h>
59#include "librsync.h"
60#include "job.h"
61#include "scoop.h"
62#include "trace.h"
63#include "util.h"
64
65/** Try to accept a from the input buffer to get LEN bytes in the scoop. */
66static inline void rs_scoop_input(rs_job_t *job, size_t len)
67{
68 rs_buffers_t *stream = job->stream;
69 size_t tocopy;
70
71 assert(len > job->scoop_avail);
72
73 if (job->scoop_alloc < len) {
74 /* Need to allocate a larger scoop. */
75 rs_byte_t *newbuf;
76 size_t newsize;
77 for (newsize = 64; newsize < len; newsize <<= 1) ;
78 newbuf = rs_alloc(newsize, "scoop buffer");
79 if (job->scoop_avail)
80 memcpy(newbuf, job->scoop_next, job->scoop_avail);
81 if (job->scoop_buf)
82 free(job->scoop_buf);
83 job->scoop_buf = job->scoop_next = newbuf;
84 rs_trace("resized scoop buffer to " FMT_SIZE " bytes from " FMT_SIZE "",
85 newsize, job->scoop_alloc);
86 job->scoop_alloc = newsize;
87 } else if (job->scoop_buf + job->scoop_alloc < job->scoop_next + len) {
88 /* Move existing data to the front of the scoop. */
89 rs_trace("moving scoop " FMT_SIZE " bytes to reuse " FMT_SIZE " bytes",
90 job->scoop_avail, (size_t)(job->scoop_next - job->scoop_buf));
91 memmove(job->scoop_buf, job->scoop_next, job->scoop_avail);
92 job->scoop_next = job->scoop_buf;
93 }
94 /* take as much input as is available, to give up to LEN bytes in the
95 scoop. */
96 tocopy = len - job->scoop_avail;
97 if (tocopy > stream->avail_in)
98 tocopy = stream->avail_in;
99 assert(job->scoop_next + tocopy + job->scoop_avail <=
100 job->scoop_buf + job->scoop_alloc);
101
102 memcpy(job->scoop_next + job->scoop_avail, stream->next_in, tocopy);
103 rs_trace("accepted " FMT_SIZE " bytes from input to scoop", tocopy);
104 job->scoop_avail += tocopy;
105 stream->next_in += tocopy;
106 stream->avail_in -= tocopy;
107}
108
109/** Advance the input cursor forward \p len bytes.
110 *
111 * This is used after doing readahead, when you decide you want to keep it. \p
112 * len must be no more than the amount of available data, so you can't cheat.
113 *
114 * So when creating a delta, we require one block of readahead. But after
115 * examining that block, we might decide to advance over all of it (if there is
116 * a match), or just one byte (if not). */
117void rs_scoop_advance(rs_job_t *job, size_t len)
118{
119 rs_buffers_t *stream = job->stream;
120
121 /* It never makes sense to advance over a mixture of bytes from the scoop
122 and input, because you couldn't possibly have looked at them all at the
123 same time. */
124 if (job->scoop_avail) {
125 /* reading from the scoop buffer */
126 rs_trace("advance over " FMT_SIZE " bytes from scoop", len);
127 assert(len <= job->scoop_avail);
128 job->scoop_avail -= len;
129 job->scoop_next += len;
130 } else {
131 rs_trace("advance over " FMT_SIZE " bytes from input buffer", len);
132 assert(len <= stream->avail_in);
133 stream->avail_in -= len;
134 stream->next_in += len;
135 }
136}
137
138/** Read from scoop without advancing.
139 *
140 * Ask for LEN bytes of input from the stream. If that much data is available,
141 * then return a pointer to it in PTR, advance the stream input pointer over
142 * the data, and return RS_DONE. If there's not enough data, then accept
143 * whatever is there into a buffer, advance over it, and return RS_BLOCKED.
144 *
145 * The data is not actually removed from the input, so this function lets you
146 * do readahead. If you want to keep any of the data, you should also call
147 * rs_scoop_advance() to skip over it. */
148rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr)
149{
150 rs_buffers_t *stream = job->stream;
151 rs_job_check(job);
152
153 if (!job->scoop_avail && stream->avail_in >= len) {
154 /* The scoop is empty and there's enough data in the input. */
155 *ptr = stream->next_in;
156 rs_trace("got " FMT_SIZE " bytes direct from input", len);
157 return RS_DONE;
158 } else if (job->scoop_avail < len && stream->avail_in) {
159 /* There is not enough data in the scoop. */
160 rs_trace("scoop has less than " FMT_SIZE " bytes, scooping from "
161 FMT_SIZE " input bytes", len, stream->avail_in);
162 rs_scoop_input(job, len);
163 }
164 if (job->scoop_avail >= len) {
165 /* There is enough data in the scoop now. */
166 rs_trace("scoop has at least " FMT_SIZE " bytes, this is enough",
167 job->scoop_avail);
168 *ptr = job->scoop_next;
169 return RS_DONE;
170 } else if (stream->eof_in) {
171 /* Not enough input data and at EOF. */
172 rs_trace("reached end of input stream");
173 return RS_INPUT_ENDED;
174 } else {
175 /* Not enough input data yet. */
176 rs_trace("blocked with insufficient input data");
177 return RS_BLOCKED;
178 }
179}
180
181/** Read LEN bytes if possible, and remove them from the input scoop.
182 *
183 * \param *job An rs_job_t pointer to the job instance.
184 *
185 * \param len The length of the data in the ptr buffer.
186 *
187 * \param **ptr will be updated to point to a read-only buffer holding the
188 * data, if enough is available.
189 *
190 * \return RS_DONE if there was enough data, RS_BLOCKED if there was not enough
191 * data yet, or RS_INPUT_ENDED if there was not enough data and at EOF. */
192rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr)
193{
194 rs_result result;
195
196 result = rs_scoop_readahead(job, len, ptr);
197 if (result == RS_DONE)
198 rs_scoop_advance(job, len);
199 return result;
200}
201
202/** Read whatever data remains in the input stream.
203 *
204 * \param *job The rs_job_t instance the job instance.
205 *
206 * \param *len will be updated to the length of the available data.
207 *
208 * \param **ptr will point at the available data.
209 *
210 * \return RS_DONE if there was data, RS_INPUT_ENDED if there was no data and
211 * at EOF, RS_BLOCKED if there was no data and not at EOF. */
212rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr)
213{
214 *len = rs_scoop_avail(job);
215 if (*len)
216 return rs_scoop_read(job, *len, ptr);
217 else if (job->stream->eof_in)
218 return RS_INPUT_ENDED;
219 else
220 return RS_BLOCKED;
221}
Generic state-machine interface.
#define rs_job_check(job)
Assert that a job is valid.
Definition: job.h:130
Public header for librsync.
rs_result
Return codes from nonblocking rsync operations.
Definition: librsync.h:180
@ RS_DONE
Completed successfully.
Definition: librsync.h:181
@ RS_INPUT_ENDED
Unexpected end of input file, perhaps due to a truncated file or dropped network connection.
Definition: librsync.h:190
@ RS_BLOCKED
Blocked waiting for more data.
Definition: librsync.h:182
rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr)
Read from scoop without advancing.
Definition: scoop.c:148
static void rs_scoop_input(rs_job_t *job, size_t len)
Try to accept a from the input buffer to get LEN bytes in the scoop.
Definition: scoop.c:66
rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr)
Read LEN bytes if possible, and remove them from the input scoop.
Definition: scoop.c:192
void rs_scoop_advance(rs_job_t *job, size_t len)
Advance the input cursor forward len bytes.
Definition: scoop.c:117
rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr)
Read whatever data remains in the input stream.
Definition: scoop.c:212
Manage librsync streams of IO.
Description of input and output buffers.
Definition: librsync.h:328
char * next_in
Next input byte.
Definition: librsync.h:334
size_t avail_in
Number of bytes available at next_in.
Definition: librsync.h:342
int eof_in
True if there is no more data after this.
Definition: librsync.h:345
The contents of this structure are private.
Definition: job.h:47
rs_byte_t * scoop_buf
Buffer of data in the scoop.
Definition: job.h:97
size_t scoop_alloc
The buffer allocation size.
Definition: job.h:99
rs_byte_t * scoop_next
The next data pointer.
Definition: job.h:98
size_t scoop_avail
The amount of data available.
Definition: job.h:100
logging functions.
Misc utility functions used by librsync.