Skip to content

Commit e7bece4

Browse files
committed
#797 Promote field extraction method from Copybook class to the companion object.
1 parent 01da263 commit e7bece4

File tree

7 files changed

+46
-38
lines changed

7 files changed

+46
-38
lines changed

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class Copybook(val ast: CopybookAST) extends Logging with Serializable {
9090
def getFieldValueByName(fieldName: String, recordBytes: Array[Byte], startOffset: Int = 0): Any = {
9191
val primitive = getPrimitiveFieldByName(fieldName)
9292

93-
extractPrimitiveField(primitive, recordBytes, startOffset)
93+
getPrimitiveField(primitive, recordBytes, startOffset)
9494
}
9595

9696
/**
@@ -203,23 +203,6 @@ class Copybook(val ast: CopybookAST) extends Logging with Serializable {
203203
}
204204
}
205205

206-
/**
207-
* Get value of a field of the copybook record by the AST object of the field
208-
*
209-
* Nested field names can contain '.' to identify the exact field.
210-
* If the field name is unique '.' is not required.
211-
*
212-
* @param field The AST object of the field
213-
* @param bytes Binary encoded data of the record
214-
* @param startOffset An offset to the beginning of the field in the data (in bytes).
215-
* @return The value of the field
216-
*
217-
*/
218-
def extractPrimitiveField(field: Primitive, bytes: Array[Byte], startOffset: Int = 0): Any = {
219-
val slicedBytes = bytes.slice(field.binaryProperties.offset + startOffset, field.binaryProperties.offset + startOffset + field.binaryProperties.actualSize)
220-
field.decodeTypeValue(0, slicedBytes)
221-
}
222-
223206
/** This routine is used for testing by generating a layout position information to compare with mainframe output */
224207
def generateRecordLayoutPositions(): String = {
225208
var fieldCounter: Int = 0
@@ -431,6 +414,28 @@ object Copybook {
431414
new Copybook(schema)
432415
}
433416

417+
/**
418+
* Get value of a field of the copybook record by the AST object of the field
419+
*
420+
* Nested field names can contain '.' to identify the exact field.
421+
* If the field name is unique '.' is not required.
422+
*
423+
* @param field The AST object of the field
424+
* @param bytes Binary encoded data of the record
425+
* @param startOffset An offset to the beginning of the field in the data (in bytes).
426+
* @return The value of the field
427+
*
428+
*/
429+
def getPrimitiveField(field: Primitive, bytes: Array[Byte], startOffset: Int = 0): Any = {
430+
val slicedBytes = bytes.slice(field.binaryProperties.offset + startOffset, field.binaryProperties.offset + startOffset + field.binaryProperties.actualSize)
431+
field.decodeTypeValue(0, slicedBytes)
432+
}
433+
434+
/** Same as getPrimitiveField(). The original method is left for backwards compatibility. */
435+
def extractPrimitiveField(field: Primitive, bytes: Array[Byte], startOffset: Int = 0): Any = {
436+
getPrimitiveField(field, bytes, startOffset)
437+
}
438+
434439
/**
435440
* Set value of a field of the copybook record by the AST object of the field
436441
*

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedWithRecordLengthExprRawRecordExtractor.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package za.co.absa.cobrix.cobol.reader.extractors.raw
1818

1919
import org.slf4j.LoggerFactory
20+
import za.co.absa.cobrix.cobol.parser.Copybook
2021
import za.co.absa.cobrix.cobol.parser.ast.Primitive
2122
import za.co.absa.cobrix.cobol.reader.iterator.RecordLengthExpression
2223
import za.co.absa.cobrix.cobol.reader.parameters.ReaderParameters
@@ -123,7 +124,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
123124

124125
final private def getRecordLengthFromField(lengthAST: Primitive, binaryDataStart: Array[Byte]): Int = {
125126
val length = if (isLengthMapEmpty) {
126-
ctx.copybook.extractPrimitiveField(lengthAST, binaryDataStart, readerProperties.startOffset) match {
127+
Copybook.getPrimitiveField(lengthAST, binaryDataStart, readerProperties.startOffset) match {
127128
case i: Int => i
128129
case l: Long => l.toInt
129130
case s: String => Try{ s.toInt }.getOrElse(throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type, encountered: '$s'."))
@@ -132,7 +133,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
132133
case _ => throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.")
133134
}
134135
} else {
135-
ctx.copybook.extractPrimitiveField(lengthAST, binaryDataStart, readerProperties.startOffset) match {
136+
Copybook.getPrimitiveField(lengthAST, binaryDataStart, readerProperties.startOffset) match {
136137
case i: Int => getRecordLengthFromMapping(i.toString)
137138
case l: Long => getRecordLengthFromMapping(l.toString)
138139
case d: BigDecimal => getRecordLengthFromMapping(d.toString())
@@ -165,7 +166,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
165166

166167
expr.fields.foreach{
167168
case (name, field) =>
168-
val obj = ctx.copybook.extractPrimitiveField(field, binaryDataStart, readerProperties.startOffset)
169+
val obj = Copybook.getPrimitiveField(field, binaryDataStart, readerProperties.startOffset)
169170
try {
170171
obj match {
171172
case i: Int => evaluator.setValue(name, i)
@@ -194,7 +195,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
194195

195196
private def getSegmentId(data: Array[Byte]): Option[String] = {
196197
segmentIdField.map(field => {
197-
val fieldValue = ctx.copybook.extractPrimitiveField(field, data, readerProperties.startOffset)
198+
val fieldValue = Copybook.getPrimitiveField(field, data, readerProperties.startOffset)
198199
if (fieldValue == null) {
199200
log.error(s"An unexpected null encountered for segment id at $byteIndex")
200201
""

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/index/IndexGenerator.scala

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,14 +99,14 @@ object IndexGenerator extends Logging {
9999
} else {
100100
if (isValid) {
101101
if (isReallyHierarchical && rootRecordId.isEmpty) {
102-
val curSegmentId = getSegmentId(copybook.get, segmentField.get, record)
102+
val curSegmentId = getSegmentId(segmentField.get, record)
103103
if ((curSegmentId.nonEmpty && rootSegmentIds.isEmpty)
104104
|| (rootSegmentIds.nonEmpty && rootSegmentIds.contains(curSegmentId))) {
105105
rootRecordId = curSegmentId
106106
}
107107
}
108108
if (canSplit && needSplit(recordsInChunk, bytesInChunk)) {
109-
if (!isReallyHierarchical || isSegmentGoodForSplit(rootSegmentIds, copybook.get, segmentField.get, record)) {
109+
if (!isReallyHierarchical || isSegmentGoodForSplit(rootSegmentIds, segmentField.get, record)) {
110110
val indexEntry = SparseIndexEntry(byteIndex, -1, fileId, recordIndex)
111111
val len = index.length
112112
// Do not add an entry if we are still at the same position as the previous entry.
@@ -157,15 +157,14 @@ object IndexGenerator extends Logging {
157157
}
158158

159159
private def isSegmentGoodForSplit(rootSegmentIds: List[String],
160-
copybook: Copybook,
161160
segmentField: Primitive,
162161
record: Array[Byte]): Boolean = {
163-
val segmentId = getSegmentId(copybook, segmentField, record)
162+
val segmentId = getSegmentId(segmentField, record)
164163
rootSegmentIds.contains(segmentId)
165164
}
166165

167-
private def getSegmentId(copybook: Copybook, segmentIdField: Primitive, data: Array[Byte]): String = {
168-
val v = copybook.extractPrimitiveField(segmentIdField, data)
166+
private def getSegmentId(segmentIdField: Primitive, data: Array[Byte]): String = {
167+
val v = Copybook.getPrimitiveField(segmentIdField, data)
169168
if (v == null) "" else v.toString.trim
170169
}
171170
}

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/iterator/FixedLenNestedRowIterator.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package za.co.absa.cobrix.cobol.reader.iterator
1818

1919
import za.co.absa.cobrix.cobol.internal.Logging
20+
import za.co.absa.cobrix.cobol.parser.Copybook
2021
import za.co.absa.cobrix.cobol.reader.extractors.record.{RecordExtractors, RecordHandler}
2122
import za.co.absa.cobrix.cobol.reader.parameters.{CorruptFieldsPolicy, ReaderParameters}
2223
import za.co.absa.cobrix.cobol.reader.schema.CobolSchema
@@ -108,7 +109,7 @@ class FixedLenNestedRowIterator[T: ClassTag](
108109

109110
private def getSegmentId(data: Array[Byte], offset: Int): Option[String] = {
110111
segmentIdField.map(field => {
111-
val fieldValue = cobolSchema.copybook.extractPrimitiveField(field, data, offset)
112+
val fieldValue = Copybook.getPrimitiveField(field, data, offset)
112113
if (fieldValue == null) {
113114
logger.error(s"An unexpected null encountered for segment id at $byteIndex")
114115
""

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/iterator/VRLRecordReader.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,9 @@ package za.co.absa.cobrix.cobol.reader.iterator
1818

1919
import za.co.absa.cobrix.cobol.internal.Logging
2020
import za.co.absa.cobrix.cobol.parser.Copybook
21-
import za.co.absa.cobrix.cobol.parser.ast.Primitive
2221
import za.co.absa.cobrix.cobol.parser.headerparsers.RecordHeaderParser
23-
import za.co.absa.cobrix.cobol.reader.parameters.ReaderParameters
2422
import za.co.absa.cobrix.cobol.reader.extractors.raw.RawRecordExtractor
23+
import za.co.absa.cobrix.cobol.reader.parameters.ReaderParameters
2524
import za.co.absa.cobrix.cobol.reader.stream.SimpleStream
2625
import za.co.absa.cobrix.cobol.reader.validator.ReaderParametersValidator
2726

@@ -143,7 +142,7 @@ class VRLRecordReader(cobolSchema: Copybook,
143142

144143
private def getSegmentId(data: Array[Byte]): Option[String] = {
145144
segmentIdField.map(field => {
146-
val fieldValue = cobolSchema.extractPrimitiveField(field, data, readerProperties.startOffset)
145+
val fieldValue = Copybook.getPrimitiveField(field, data, readerProperties.startOffset)
147146
if (fieldValue == null) {
148147
logger.error(s"An unexpected null encountered for segment id at $byteIndex")
149148
""

cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
package za.co.absa.cobrix.cobol.parser.extract
1818

1919
import org.scalatest.funsuite.AnyFunSuite
20-
import za.co.absa.cobrix.cobol.parser.CopybookParser
2120
import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, CobolType}
2221
import za.co.absa.cobrix.cobol.parser.ast.{BinaryProperties, Group, Primitive}
2322
import za.co.absa.cobrix.cobol.parser.decoders.DecoderSelector
2423
import za.co.absa.cobrix.cobol.parser.encoding.{EBCDIC, EncoderSelector}
2524
import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy
25+
import za.co.absa.cobrix.cobol.parser.{Copybook, CopybookParser}
2626

2727
class BinaryExtractorSpec extends AnyFunSuite {
2828

@@ -113,15 +113,15 @@ class BinaryExtractorSpec extends AnyFunSuite {
113113
0x00.toByte, 0x00.toByte, 0x2F.toByte
114114
)
115115

116-
val copybook = CopybookParser.parseTree(copyBookContents)
116+
val copybook: Copybook = CopybookParser.parseTree(copyBookContents)
117117
val startOffset: Int = 0
118118

119119
test("Test extract primitive field") {
120120

121121
// using getFieldByName
122122
val statement = copybook.getFieldByName("ID")
123123
val field: Primitive = statement.asInstanceOf[Primitive]
124-
val result: Any = copybook.extractPrimitiveField(field, bytes, startOffset)
124+
val result: Any = Copybook.getPrimitiveField(field, bytes, startOffset)
125125
assert(result.asInstanceOf[Int] === 6)
126126

127127
// traverse AST and extract all primitives to map
@@ -130,7 +130,7 @@ class BinaryExtractorSpec extends AnyFunSuite {
130130
def traverseAst(group: Group): Unit = {
131131
for (child <- group.children) {
132132
if (child.isInstanceOf[Primitive]) {
133-
extractedData += (child.name -> copybook.extractPrimitiveField(child.asInstanceOf[Primitive],
133+
extractedData += (child.name -> Copybook.extractPrimitiveField(child.asInstanceOf[Primitive],
134134
bytes, startOffset))
135135
} else {
136136
assert(child.isInstanceOf[Group] === true)
@@ -162,7 +162,7 @@ class BinaryExtractorSpec extends AnyFunSuite {
162162

163163
val primitive: Primitive = Primitive(level, name, name, lineNumber, dataType, redefines, isRedefined,
164164
occurs, to, dependingOn, Map(), isDependee, isFiller, DecoderSelector.getDecoder(dataType), EncoderSelector.getEncoder(dataType), binaryProperties)(None)
165-
val result2: Any = copybook.extractPrimitiveField(primitive, bytes, startOffset)
165+
val result2: Any = Copybook.extractPrimitiveField(primitive, bytes, startOffset)
166166
assert(result2.asInstanceOf[String] === "EXAMPLE4")
167167
}
168168

spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/writer/NestedRecordCombiner.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ object NestedRecordCombiner {
278278
}
279279
}
280280
}.getOrElse {
281-
// Dependee fields need not to be defines in Spark schema.
281+
// Dependee fields need not be defined in Spark schema.
282282
if (p.isDependee) {
283283
PrimitiveDependeeField(addDependee())
284284
} else {
@@ -369,6 +369,9 @@ object NestedRecordCombiner {
369369

370370
// ── Primitive which has an OCCURS DEPENDS ON ─────────────────────────────
371371
case PrimitiveDependeeField(spec) =>
372+
// NOTE: baseOffset is mutated here for each row. This is safe because rows
373+
// are processed sequentially within mapPartitions, and the offset is always
374+
// updated before being read in subsequent array-element writes.
372375
spec.baseOffset = currentOffset
373376
spec.cobolField.binaryProperties.actualSize
374377

0 commit comments

Comments
 (0)