|
5 | 5 | To add http.almaren dependency to your sbt build: |
6 | 6 |
|
7 | 7 | ``` |
8 | | -libraryDependencies += "com.github.music-of-the-ainur" %% "http-almaren" % "1.2.8-3.4" |
| 8 | +libraryDependencies += "com.github.music-of-the-ainur" %% "http-almaren" % "1.2.9-3.4" |
9 | 9 | ``` |
10 | 10 |
|
11 | 11 | To run in spark-shell: |
12 | 12 |
|
13 | 13 | ``` |
14 | | -spark-shell --master "local[*]" --packages "com.github.music-of-the-ainur:almaren-framework_2.12:0.9.9-3.4,com.github.music-of-the-ainur:http-almaren_2.12:1.2.8-3.4" |
| 14 | +spark-shell --master "local[*]" --packages "com.github.music-of-the-ainur:almaren-framework_2.12:0.9.10-3.4,com.github.music-of-the-ainur:http-almaren_2.12:1.2.9-3.4" |
15 | 15 | ``` |
16 | 16 |
|
17 | 17 | ## Table of Contents |
@@ -43,14 +43,14 @@ repository. |
43 | 43 |
|
44 | 44 | | versions | Connector Artifact | |
45 | 45 | |----------------------------|-------------------------------------------------------------| |
46 | | -| Spark 3.4.x and scala 2.13 | `com.github.music-of-the-ainur:http-almaren_2.13:1.2.8-3.4` | |
47 | | -| Spark 3.4.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.8-3.4` | |
48 | | -| Spark 3.3.x and scala 2.13 | `com.github.music-of-the-ainur:http-almaren_2.13:1.2.8-3.3` | |
49 | | -| Spark 3.3.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.8-3.3` | |
50 | | -| Spark 3.2.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.8-3.2` | |
51 | | -| Spark 3.1.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.8-3.1` | |
52 | | -| Spark 2.4.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.8-2.4` | |
53 | | -| Spark 2.4.x and scala 2.11 | `com.github.music-of-the-ainur:http-almaren_2.11:1.2.8-2.4` | |
| 46 | +| Spark 3.4.x and scala 2.13 | `com.github.music-of-the-ainur:http-almaren_2.13:1.2.9-3.4` | |
| 47 | +| Spark 3.4.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.9-3.4` | |
| 48 | +| Spark 3.3.x and scala 2.13 | `com.github.music-of-the-ainur:http-almaren_2.13:1.2.9-3.3` | |
| 49 | +| Spark 3.3.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.9-3.3` | |
| 50 | +| Spark 3.2.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.9-3.2` | |
| 51 | +| Spark 3.1.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.9-3.1` | |
| 52 | +| Spark 2.4.x and scala 2.12 | `com.github.music-of-the-ainur:http-almaren_2.12:1.2.9-2.4` | |
| 53 | +| Spark 2.4.x and scala 2.11 | `com.github.music-of-the-ainur:http-almaren_2.11:1.2.9-2.4` | |
54 | 54 |
|
55 | 55 | ## Methods |
56 | 56 |
|
@@ -501,118 +501,3 @@ How to concatenate by new line: |
501 | 501 | (rows: Seq[Row]) => rows.map(row => row.getAs[String](Alias.DataCol)).mkString("\n") |
502 | 502 | ``` |
503 | 503 |
|
504 | | -### HTTP Row |
505 | | - |
506 | | -It will initiate an HTTP request for each Row, extracting headers, parameters, and hidden parameters from each Row. |
507 | | - |
508 | | -``` |
509 | | -$ curl -X PUT -H "Authorization: {SESSION_ID}" \ |
510 | | --H "Content-Type: text/csv" \ |
511 | | --H "Accept: text/csv" \ |
512 | | ---data-binary @"filename" \ |
513 | | -https://localhost/objects/documents/batch |
514 | | -``` |
515 | | - |
516 | | -#### Example |
517 | | - |
518 | | -```scala |
519 | | - import com.github.music.of.the.ainur.almaren.Almaren |
520 | | -import com.github.music.of.the.ainur.almaren.builder.Core.Implicit |
521 | | -import com.github.music.of.the.ainur.almaren.http.HTTPConn.HTTPImplicit |
522 | | -import org.apache.spark.sql.Row |
523 | | -import org.apache.spark.sql.types.{MapType, StringType, StructField, StructType} |
524 | | -import scala.collection.JavaConverters.asScalaIteratorConverter |
525 | | -import spark.implicits._ |
526 | | - |
527 | | -val almaren = Almaren("http-almaren") |
528 | | - |
529 | | -val df = Seq( |
530 | | - ("John", "Smith", "London"), |
531 | | - ("David", "Jones", "India"), |
532 | | - ("Michael", "Johnson", "Indonesia"), |
533 | | - ("Chris", "Lee", "Brazil"), |
534 | | - ("Mike", "Brown", "Russia") |
535 | | -).toDF("first_name", "last_name", "country").coalesce(1) |
536 | | - |
537 | | -df.createOrReplaceTempView("person_info") |
538 | | -val requestSchema = StructType(Seq( |
539 | | - StructField("__URL__", StringType), |
540 | | - StructField("__DATA__", StringType), |
541 | | - StructField("__REQUEST_HEADERS__", MapType(StringType, StringType)), |
542 | | - StructField("__REQUEST_PARAMS__", MapType(StringType, StringType)), |
543 | | - StructField("__REQUEST_HIDDEN_PARAMS__", MapType(StringType, StringType)) |
544 | | -)) |
545 | | - |
546 | | -//Constructing the request dataframe by generating necessary input columns from each row in the input dataframe. |
547 | | -val requestRows: Seq[Row] = df.toLocalIterator.asScala.toList.map(row => { |
548 | | - val firstName = row.getAs[String]("first_name") |
549 | | - val lastName = row.getAs[String]("last_name") |
550 | | - val country = row.getAs[String]("country") |
551 | | - val url = s"http://localhost:3000/fireshots/getInfo" |
552 | | - val headers = scala.collection.mutable.Map[String, String]() |
553 | | - headers.put("data", firstName) |
554 | | - val params = scala.collection.mutable.Map[String, String]() |
555 | | - params.put("params", lastName) |
556 | | - val hiddenParams = scala.collection.mutable.Map[String, String]() |
557 | | - hiddenParams.put("hidden_params", country) |
558 | | - |
559 | | - Row(url, |
560 | | - s"""{"first_name" : "$firstName","last_name":"$lastName","country":"$country"} """, |
561 | | - headers, |
562 | | - params, |
563 | | - hiddenParams |
564 | | - ) |
565 | | -}) |
566 | | - |
567 | | -val requestDataframe = spark.createDataFrame(spark.sparkContext.parallelize(requestRows), requestSchema) |
568 | | - |
569 | | - |
570 | | -val responseDf = almaren.builder |
571 | | - .sourceDataFrame(requestDataframe) |
572 | | - .sqlExpr("monotonically_increasing_id() as __ID__", "__DATA__", "__URL__", "__REQUEST_HEADERS__", "__REQUEST_PARAMS__", "__REQUEST_HIDDEN_PARAMS__") |
573 | | - .httpRow(method = "POST") |
574 | | - .batch |
575 | | - |
576 | | -responseDf.show(false) |
577 | | -``` |
578 | | - |
579 | | -#### Parameters |
580 | | - |
581 | | -| Parameter | Description | Type | |
582 | | -|----------------|-------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------| |
583 | | -| headers | HTTP headers | Map[String,String] | |
584 | | -| params | HTTP params | Map[String,String] | |
585 | | -| hiddenParams | HTTP params which are hidden (not exposed in logs) | Map[String,String] | |
586 | | -| method | HTTP Method | String | |
587 | | -| requestHandler | Closure to handle HTTP request | (Row,Session,String,Map[String,String],String) => requests.Respons | |
588 | | -| session | Closure to handle HTTP sessions | () = requests.Session | |
589 | | -| connectTimeout | Timeout in ms to keep the connection keep-alive, it's recommended to keep this number high | Int | |
590 | | -| readTimeout | Maximum number of ms to perform a single HTTP request | Int | |
591 | | -| threadPoolSize | How many connections in parallel for each executor. parallelism = number of excutors * number of cores * threadPoolSize | Int | |
592 | | -| batchSize | How many records a single thread will process | Int | |
593 | | - |
594 | | -| Parameters | Mandatory | Description | Column Type | |
595 | | -|-------------------------------|-----------|------------------------------------------------------------------------------------|--------------------| |
596 | | -| \_\_ID\_\_ | Yes | This field will be in response of http.almaren component, it's useful to join data | String | |
597 | | -| \_\_URL\_\_ | Yes | Used to perform the HTTP request | String | |
598 | | -| \_\_DATA\_\_ | No | Data Content, used in POST/PUT Method HTTP requests | String | |
599 | | -| \_\_REQUEST_HEADERS\_\_ | Yes | HTTP headers | Map[String,String] | |
600 | | -| \_\_REQUEST_PARAMS\_\_ | Yes | HTTP params | Map[String,String] | |
601 | | -| \_\_REQUEST_HIDDEN_PARAMS\_\_ | Yes | HTTP params which are hidden (not exposed in logs) | Map[String,String] | |
602 | | - |
603 | | -| Parameters | Description | |
604 | | -|-------------------------------|------------------------------------------------------------| |
605 | | -| \_\_ID\_\_ | Custom ID , This field will be useful to join data | |
606 | | -| \_\_BODY\_\_ | HTTP response | |
607 | | -| \_\_HEADER\_\_ | HTTP response header | |
608 | | -| \_\_STATUS_CODE\_\_ | HTTP response code | |
609 | | -| \_\_STATUS_MSG\_\_ | HTTP response message | |
610 | | -| \_\_ERROR\_\_ | Java Exception | |
611 | | -| \_\_ELAPSED_TIME\_\_ | Request time in ms | |
612 | | -| \_\_URL\_\_ | HTTP request URL | |
613 | | -| \_\_DATA\_\_ | Data Content, used in POST/PUT Method HTTP requests | |
614 | | -| \_\_REQUEST_HEADERS\_\_ | HTTP Request headers | |
615 | | -| \_\_REQUEST_PARAMS\_\_ | HTTP Request params | |
616 | | -| \_\_REQUEST_HIDDEN_PARAMS\_\_ | HTTP Request params which are hidden (not exposed in logs) | |
617 | | - |
618 | | - |
0 commit comments