taxi

package

v0.8.0 Latest Latest Go to latest Published: Feb 14, 2018 License: BSD-3-Clause Imports: 16 Imported by: 4

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/pilosa/pdk

Links

Open Source Insights

README ¶

Inspiration

Pilosa queries

frames:

cabType
dist_miles
dropDay
dropGridID
dropMonth
dropTime
duration_minutes
pickupDay
pickupGridID
pickupMonth
pickupTime
speed_mph

also need passenger_count

count per cab_type tick ; q=""; for i in {0..10} ; do q="${q}Count(Bitmap(id=$i, frame=cabType))" ; done ; curl localhost:15000/query?db=taxi -d "$q" ; tock
avg(total_amount) per passenger_count a. need a Sum() that works on attributes b. use binary representation of cents to get a Sum?
count per (passenger_count, year) loop over x, y: count(intersect(x, y))
count per (passenger_count, year, round(trip_distance)) order by (year, count) loop over x, y, z... then do the ordering externally

Mark queries

Mark uses four queries, I'm not sure if there is a place where he lays out exactly what they are.

bigquery

bq query "SELECT cab_type,
                 count(*)
          FROM [taxis-1273:trips.taxi_trips]
          GROUP BY cab_type;"

bq query "SELECT passenger_count,
                 avg(total_amount)
          FROM [taxis-1273:trips.taxi_trips]
          GROUP BY passenger_count;"

bq query "SELECT passenger_count,
                 year(pickup_datetime),
                 count(*)
          FROM [taxis-1273:trips.taxi_trips]
          GROUP BY 1, 2;"

bq query "SELECT passenger_count,
                 year(pickup_datetime),
                 round(trip_distance),
                 count(*)
          FROM [taxis-1273:trips.taxi_trips]
          GROUP BY 1, 2, 3
          ORDER BY 2, 4 desc;"

elasticsearch

SELECT cab_type,
       count(*)
FROM trips
GROUP BY cab_type

SELECT passenger_count,
       avg(total_amount)
FROM trips
GROUP BY passenger_count

SELECT passenger_count,
       count(*) trips
FROM trips
GROUP BY passenger_count,
         date_histogram(field='pickup_datetime',
                              'interval'='year',
                              'alias'='year')

postgres sanity check

setup

create database taxi;

\connect taxi;

create table rides (id int unique, cab_type int, passenger_count int, total_amount float, pickup_datetime timestamp, trip_distance float);

insert into rides (id, cab_type, passenger_count, total_amount, pickup_datetime, trip_distance) values (0, 1, 1, 20.00, '2017-02-10 06:00:00', 2);
insert into rides (id, cab_type, passenger_count, total_amount, pickup_datetime, trip_distance) values (1, 1, 1, 10.00, '2017-02-10 07:00:00', 3);
insert into rides (id, cab_type, passenger_count, total_amount, pickup_datetime, trip_distance) values (2, 1, 2, 15.00, '2017-02-10 08:00:00', 2.5);
insert into rides (id, cab_type, passenger_count, total_amount, pickup_datetime, trip_distance) values (3, 2, 1, 12.00, '2017-02-10 09:00:00', 1);
insert into rides (id, cab_type, passenger_count, total_amount, pickup_datetime, trip_distance) values (4, 2, 2, 24.00, '2017-02-10 10:00:00', 4);

queries

select cab_type, count(*) from rides group by cab_type;

cab_type | count ----------+------- 1 | 3 2 | 2

select passenger_count, avg(total_amount) from rides group by passenger_count;

passenger_count | avg -----------------+------ 1 | 14 2 | 19.5

select passenger_count, extract(year from pickup_datetime), count(*) from rides group by 1, 2;

passenger_count | date_part | count -----------------+-----------+------- 2 | 2017 | 2 1 | 2017 | 3

select passenger_count, extract(year from pickup_datetime), round(trip_distance), count(*) from rides group by 1, 2, 3 order by 2, 4 desc;

passenger_count | date_part | round | count -----------------+-----------+-------+------- 2 | 2017 | 4 | 1 2 | 2017 | 2 | 1 1 | 2017 | 2 | 1 1 | 2017 | 3 | 1 1 | 2017 | 1 | 1

Documentation ¶

Index ¶

type BitFrame
type Counter
- func (c *Counter) Add(n int)
- func (c *Counter) Get() (ret int64)
type Main
- func NewMain() *Main
type Nexter
- func (n *Nexter) Last() (lastID uint64)
- func (n *Nexter) Next() (nextID uint64)
type Record
- func (r Record) Clean() ([]string, bool)

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type BitFrame ¶

type BitFrame struct {
	Bit   uint64
	Frame string
}

type Counter ¶

type Counter struct {
	// contains filtered or unexported fields
}

func (*Counter) Add ¶

func (c *Counter) Add(n int)

func (*Counter) Get ¶

func (c *Counter) Get() (ret int64)

type Main ¶

type Main struct {
	PilosaHost       string
	URLFile          string
	FetchConcurrency int
	Concurrency      int
	Index            string
	BufferSize       int
	UseReadAll       bool
	// contains filtered or unexported fields
}

func NewMain ¶

func NewMain() *Main

func (*Main) AddBytes ¶

func (m *Main) AddBytes(n int)

func (*Main) BytesProcessed ¶

func (m *Main) BytesProcessed() (num int64)

func (*Main) Run ¶

func (m *Main) Run() error

type Nexter ¶

type Nexter struct {
	// contains filtered or unexported fields
}

Nexter generates unique sequential ids in a threadsafe way.

func (*Nexter) Last ¶

func (n *Nexter) Last() (lastID uint64)

func (*Nexter) Next ¶

func (n *Nexter) Next() (nextID uint64)

Next generates a new id

type Record ¶

type Record struct {
	Type rune
	Val  string
}

func (Record) Clean ¶

func (r Record) Clean() ([]string, bool)

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL