Neville Li
Jan 2015
neville@spotify.com
@sinisa_lyh
{
"type": "record",
"name": "Account",
"namespace": "me.lyh.parquet.avro.schema",
"fields": [
{"name": "id", "type": "int"},
{"name": "type", "type": "string"},
{"name": "name", "type": "string"},
{"name": "amount", "type": "float"}
]
}
ParquetAvroSource[Account]("input")
.map(a => (a.getName, a.getAmount))
.group
.reduce(_+_)
// native
pipe.map(a => (a.getName, a.getAmount))
// Parquet
Parquet.project[Account]("name", "amount")
// native
pipe.filter(a => a.getName == "Neville" && a.getAmount > 100)
// Parquet
FilterApi.and(
FilterApi.eq(FilterApi.binaryColumn("name"),
Binary.fromString("Neville")),
FilterApi.gt(FilterApi.floatColumn("amount"),
100f.asInstnacesOf[java.lang.Float]) // Java...
)
Like Clojure, but worse
_.getAccounts.get(0).getAmount > 10
Internal
scala.this.Predef.Integer2int(x$1.getAccounts().get(0).getAmount()).>(10)
RAWRRR
Apply(Select(Apply(Select(Select(This(newTypeName("scala")), scala.Predef),
newTermName("Integer2int")), List(Apply(Select(Apply(Select(Apply(Select(
Ident(newTermName("x$1")), newTermName("getAccounts")), List()),
newTermName("get")), List(Literal(Constant(0)))), newTermName("getAmount")),
List()))), newTermName("$greater")), List(Literal(Constant(10))))
Don't worry, there's pattern matching and recursion
import org.apache.avro.Schema
import org.apache.avro.specific.{ SpecificRecord => SR }
object Projection {
def apply[T <: SR](gs: (T => Any)*): Schema = macro applyImpl[T]
def applyImpl[T <: SR : c.WeakTypeTag]
(c: Context)(gs: c.Expr[(T => Any)]*): c.Expr[Schema] = {
// ...
}
Projection[Accont](_.getName, _.getAmount)
import _root_.parquet.filter2.predicate.FilterPredicate
import org.apache.avro.specific.{ SpecificRecord => SR }
object Predicate {
def apply[T <: SR](p: T => Boolean): FilterPredicate = macro applyImpl[T]
def applyImpl[T <: SR : c.WeakTypeTag]
(c: Context)
(p: c.Expr[T => Boolean]): c.Expr[FilterPredicate] = {
// ...
}
Predicate[Accont](x => x.getName == "Neville" && x.getAmount > 100)
that I have to mimic
(a > 10)
=== (10 < a)
(_.getBool)
=== (_.getBool == true)
https://github.com/nevillelyh/parquet-avro-extra
In production @Spotify