Skip to content

Commit 11cfa5a

Browse files
author
Marco De Salvo
committed
Better comments
1 parent 6d7def1 commit 11cfa5a

File tree

3 files changed

+86
-107
lines changed

3 files changed

+86
-107
lines changed

RDFSharp/Model/Serializers/Turtle/TurtleStreamBuffer.cs renamed to RDFSharp/Model/Serializers/Turtle/BufferedStreamReader.cs

Lines changed: 50 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,32 @@ limitations under the License.
2020
namespace RDFSharp.Model
2121
{
2222
/// <summary>
23-
/// Buffer scorrevole per la lettura efficiente di stream di grandi dimensioni durante il parsing Turtle
23+
/// BufferedStreamReader wraps a StreamReader with a sliding buffer for efficient reading of large streams
2424
/// </summary>
25-
internal class TurtleStreamBuffer : IDisposable
25+
internal class BufferedStreamReader : IDisposable
2626
{
27+
#region Fields
2728
private readonly StreamReader _reader;
2829
private readonly char[] _buffer;
2930
private readonly int _bufferSize;
3031

31-
private int _bufferStart; // Posizione assoluta nel file del primo carattere nel buffer
32-
private int _bufferLength; // Numero di caratteri validi nel buffer
32+
private int _bufferStart; // Absolute file position of the first character in the buffer
33+
private int _bufferLength; // Number of valid characters in the buffer
3334
private bool _endOfStream;
35+
#endregion
3436

35-
/// <summary>
36-
/// Posizione corrente nel file
37-
/// </summary>
38-
public int Position { get; set; }
37+
#region Properties
38+
internal int Position { get; set; }
39+
40+
private bool IsEndOfFile
41+
=> _endOfStream && Position >= _bufferStart + _bufferLength;
42+
#endregion
3943

44+
#region Ctors
4045
/// <summary>
41-
/// Indica se abbiamo raggiunto la fine del file
46+
/// Builds a BufferedStreamReader wrapping the given StreamReader with the given size of buffer
4247
/// </summary>
43-
public bool IsEndOfFile => _endOfStream && Position >= _bufferStart + _bufferLength;
44-
45-
public TurtleStreamBuffer(StreamReader reader, int bufferSize = 8192)
48+
public BufferedStreamReader(StreamReader reader, int bufferSize=8192)
4649
{
4750
_reader = reader ?? throw new ArgumentNullException(nameof(reader));
4851
_bufferSize = bufferSize;
@@ -52,29 +55,39 @@ public TurtleStreamBuffer(StreamReader reader, int bufferSize = 8192)
5255
Position = 0;
5356
_endOfStream = false;
5457

55-
// Carica il primo blocco
5658
FillBuffer();
5759
}
60+
#endregion
5861

62+
#region Interfaces
5963
/// <summary>
60-
/// Legge il prossimo code point Unicode
64+
/// Disposes the BufferedStreamReader
65+
/// </summary>
66+
public void Dispose()
67+
=> _reader?.Dispose();
68+
#endregion
69+
70+
#region Methods
71+
/// <summary>
72+
/// Reads the next Unicode codepoint
6173
/// </summary>
6274
public int ReadCodePoint()
6375
{
6476
if (IsEndOfFile)
6577
return -1;
6678

67-
// Assicurati che il carattere corrente sia nel buffer
79+
// Ensure that the current character is in the buffer
80+
// and, if possibile, feed the buffer with new data
6881
EnsureBufferContainsPosition();
6982

83+
// Update position after buffering of new data
7084
if (Position >= _bufferStart + _bufferLength)
7185
return -1; // EOF
72-
7386
int bufferIndex = Position - _bufferStart;
7487
char highSurrogate = _buffer[bufferIndex];
7588
Position++;
7689

77-
// Gestione surrogate pairs per caratteri Unicode supplementari
90+
// Handle eventual presence of surrogate pairs
7891
if (char.IsHighSurrogate(highSurrogate))
7992
{
8093
EnsureBufferContainsPosition();
@@ -94,99 +107,84 @@ public int ReadCodePoint()
94107
}
95108

96109
/// <summary>
97-
/// Sbircia il prossimo code point senza avanzare la posizione
110+
/// Peek at the next code point without advancing position
98111
/// </summary>
99112
public int PeekCodePoint()
100113
{
101114
int currentPos = Position;
102115
int codePoint = ReadCodePoint();
103-
Position = currentPos; // Ripristina posizione
116+
Position = currentPos; // Restore the position
104117
return codePoint;
105118
}
106119

107120
/// <summary>
108-
/// Torna indietro di un code point
121+
/// Goes back one codepoint (or 2, depending if it represents a surrogate pair)
109122
/// </summary>
110123
public void UnreadCodePoint(int codePoint)
111124
{
112125
if (codePoint == -1)
113126
return;
114127

115-
if (IsSupplementaryCodePoint(codePoint))
116-
{
117-
// Carattere supplementare (surrogate pair) - torna indietro di 2 posizioni
128+
// Surrogate character (represented in UTF-16 as pair of 2 chars): move back 2 positions
129+
if ((codePoint & ~char.MaxValue) != 0)
118130
Position = Math.Max(0, Position - 2);
119-
}
131+
132+
// Normal character: move back 1 position
120133
else
121-
{
122-
// Carattere normale - torna indietro di 1 posizione
123134
Position = Math.Max(0, Position - 1);
124-
}
125135
}
126136

127137
/// <summary>
128-
/// Torna indietro per una stringa di caratteri
138+
/// Goes back the given string of characters
129139
/// </summary>
130140
public void UnreadString(string str)
131141
{
132142
if (string.IsNullOrEmpty(str))
133143
return;
134144

135-
// Torna indietro carattere per carattere (dal fondo)
145+
// Go back character by character (starting from the last)
136146
for (int i = str.Length - 1; i >= 0; i--)
137-
{
138147
UnreadCodePoint(str[i]);
139-
}
140148
}
141149

142150
/// <summary>
143-
/// Assicura che il buffer contenga la posizione corrente
151+
/// Ensures that the buffer contains the current position
144152
/// </summary>
145153
private void EnsureBufferContainsPosition()
146154
{
147-
// Se la posizione è oltre la fine del buffer corrente, carica nuovo buffer
155+
// If position is beyond the end of the current buffer, load a new buffer
148156
if (Position >= _bufferStart + _bufferLength && !_endOfStream)
149-
{
150157
FillBuffer();
151-
}
152-
// Se la posizione è prima dell'inizio del buffer, dobbiamo gestire backward seek
158+
159+
// If position is before the start of the buffer, we need to handle backward seek
153160
else if (Position < _bufferStart)
154-
{
155-
// Per semplicità, ricarica dall'inizio (può essere ottimizzato se necessario)
156161
ReloadFromPosition(Position);
157-
}
158162
}
159163

160164
/// <summary>
161-
/// Riempie il buffer dalla posizione corrente
165+
/// Fills the buffer from the current position
162166
/// </summary>
163167
private void FillBuffer()
164168
{
165169
if (_endOfStream)
166170
return;
167171

168-
// Se siamo alla fine del buffer corrente, shift del buffer
172+
// If we are at the end of the current buffer, shift the file position of the buffer
169173
if (Position >= _bufferStart + _bufferLength)
170-
{
171174
_bufferStart = Position;
172-
}
173175

174-
// Leggi il prossimo blocco
176+
// Read the next block of data from the stream
175177
_bufferLength = _reader.Read(_buffer, 0, _bufferSize);
176-
177178
if (_bufferLength == 0)
178-
{
179179
_endOfStream = true;
180-
}
181180
}
182181

183182
/// <summary>
184-
/// Ricarica il buffer da una posizione specifica (per backward seeks)
183+
/// Reloads the buffer from a specific location (for backward seeks)
185184
/// </summary>
186185
private void ReloadFromPosition(int position)
187186
{
188-
// Questo è un caso limite - per semplicità resettiamo lo stream
189-
// In un'implementazione più sofisticata si potrebbe mantenere un buffer circolare
187+
// Reset the stream to begin from the given position
190188
if (_reader.BaseStream.CanSeek)
191189
{
192190
_reader.BaseStream.Seek(0, SeekOrigin.Begin);
@@ -195,28 +193,16 @@ private void ReloadFromPosition(int position)
195193
Position = 0;
196194
_endOfStream = false;
197195

198-
// Avanza fino alla posizione desiderata
196+
// Advance to the desired position
199197
while (Position < position && !IsEndOfFile)
200-
{
201198
ReadCodePoint();
202-
}
203199
Position = position;
204200
}
205201
else
206202
{
207203
throw new InvalidOperationException("Cannot seek backward on non-seekable stream");
208204
}
209205
}
210-
211-
/// <summary>
212-
/// Determina se il code point richiede surrogate pair
213-
/// </summary>
214-
private static bool IsSupplementaryCodePoint(int codePoint)
215-
=> (codePoint & ~char.MaxValue) != 0;
216-
217-
public void Dispose()
218-
{
219-
_reader?.Dispose();
220-
}
206+
#endregion
221207
}
222208
}

0 commit comments

Comments
 (0)