In [0]:
#@title Install gspread
!pip install --upgrade -q gspread

In [0]:
#@title Import Strava activitities from Google Sheets

from google.colab import auth
auth.authenticate_user()

import gspread
from oauth2client.client import GoogleCredentials

gc = gspread.authorize(GoogleCredentials.get_application_default())

worksheet = gc.open('Strava').sheet1

# get_all_values gives a list of rows.
rows = worksheet.get_all_values()

# Convert to a DataFrame and render.
import pandas as pd
activities_raw = pd.DataFrame.from_records(rows[1:], columns=rows[0]);

In [0]:
#@title Index activities, convert data types, convert units
activities = activities_raw.copy()

# Project
activities = activities[['date', 'type', 'distance', 'elapsed_time', 'commute', 'name']]

# Set data types
activities['distance'] = activities['distance'].astype(float)
activities['elapsed_time'] = activities['elapsed_time'].astype(float)
activities['commute'] = activities['commute'].apply(lambda x: 1 if x == 'TRUE' else 0)

# Convert units
activities['distance'] = activities['distance'] / 1000
activities['elapsed_time'] = activities['elapsed_time'] / 60

# Index
activities['date'] = pd.to_datetime(activities_raw['date'])
activities.set_index(['date', 'type'], inplace=True)

In [0]:
#@title Summarize by day and activity type
activities_by_day = (
    activities
        .groupby(by=[pd.Grouper(freq='D', level=0), 'type'])
        .sum())

activities_by_day = (
    activities_by_day
        .reset_index(level='type')
        .to_period('D')
        .reset_index()
        .set_index(['date', 'type']))

first_day = activities_by_day.index.levels[0].min()
last_day = activities_by_day.index.levels[0].max()
days = pd.period_range(first_day, last_day, freq='D')
types = activities_by_day.index.levels[1].unique()
index = pd.MultiIndex.from_product([days, types], names=['day', 'type'])
activities_by_day = activities_by_day.reindex(index, fill_value=0)

In [0]:
#@title Print a few random activities
print(
    activities[['distance', 'elapsed_time']]
      .sample(5)
      .sort_index()
      .to_string(formatters={
          'distance': '{:.0f}km'.format,
          'elapsed_time':' {:.0f}min'.format,
      }))

In [0]:
#@title Print totals
print(
    activities['distance']
      .groupby(level='type')
      .agg(['count', 'sum'])
      .sort_values(by='count', ascending=False)
      .to_string(formatters={'sum':' {:.0f}km'.format}))

In [0]:
#@title Show date range

first_date = activities.index.levels[0].min()
last_date = activities.index.levels[0].max()
print('Activities recorded from %s to %s, total of %d days.' % (
    first_date.strftime('%Y-%m-%d'),
    last_date.strftime('%Y-%m-%d'),
    (last_date - first_date).days))