import pandas as pd
import numpy as np
!head pvcs.csv
NAME,STATUS,VOLUME,CAPACITY,ACCESSMODES,AGE
claim-redacted-205,Bound,pvc-5643aadf-ddb3-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-1088,Bound,pvc-bc68d752-ecba-11e6-98ef-42010af000c3,10Gi,RWO,2d
claim-redacted-389,Bound,pvc-5d67d43e-ddcb-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-119,Bound,pvc-73c5d644-ddb1-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-64,Bound,pvc-3c2345e4-dd9f-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-192,Bound,pvc-51b672a8-ddb3-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-814,Bound,pvc-2b83ae02-dee0-11e6-98ef-42010af000c3,10Gi,RWO,20d
claim-redacted-775,Bound,pvc-ec1d47be-deb0-11e6-98ef-42010af000c3,10Gi,RWO,20d
claim-redacted-954,Bound,pvc-5a0acdc5-e3ee-11e6-98ef-42010af000c3,10Gi,RWO,13d
= pd.DataFrame.from_csv('pvcs.csv')
df df.head()
STATUS | VOLUME | CAPACITY | ACCESSMODES | AGE | |
---|---|---|---|---|---|
NAME | |||||
claim-redacted-205 | Bound | pvc-5643aadf-ddb3-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d |
claim-redacted-1088 | Bound | pvc-bc68d752-ecba-11e6-98ef-42010af000c3 | 10Gi | RWO | 2d |
claim-redacted-389 | Bound | pvc-5d67d43e-ddcb-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d |
claim-redacted-119 | Bound | pvc-73c5d644-ddb1-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d |
claim-redacted-64 | Bound | pvc-3c2345e4-dd9f-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d |
'username'] = np.array(df.index.str.extract('\w+-([\w-]+)-\d+$'))
df['id'] = np.array(df.index.str.extract('\w+-\w+-(\d+)$'))
df[ df.head()
/Users/redacted/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: currently extract(expand=None) means expand=False (return Index/Series/DataFrame) but in a future version of pandas this will be changed to expand=True (return DataFrame)
if __name__ == '__main__':
/Users/redacted/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: currently extract(expand=None) means expand=False (return Index/Series/DataFrame) but in a future version of pandas this will be changed to expand=True (return DataFrame)
from ipykernel import kernelapp as app
STATUS | VOLUME | CAPACITY | ACCESSMODES | AGE | username | id | ||
---|---|---|---|---|---|---|---|---|
NAME | ||||||||
claim-redacted-205 | Bound | pvc-5643aadf-ddb3-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 205 | |
claim-redacted-1088 | Bound | pvc-bc68d752-ecba-11e6-98ef-42010af000c3 | 10Gi | RWO | 2d | redacted | 1088 | |
claim-redacted-389 | Bound | pvc-5d67d43e-ddcb-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 389 | |
claim-redacted-119 | Bound | pvc-73c5d644-ddb1-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 119 | |
claim-redacted-64 | Bound | pvc-3c2345e4-dd9f-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 64 |
df.tail()
STATUS | VOLUME | CAPACITY | ACCESSMODES | AGE | username | id | |
---|---|---|---|---|---|---|---|
NAME | |||||||
claim-redacted-562 | Bound | pvc-4418d1d6-de0a-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 562 |
claim-redacted-400 | Bound | pvc-2399fe54-ddcf-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 400 |
claim-redacted-946 | Bound | pvc-294cb2c7-e35e-11e6-98ef-42010af000c3 | 10Gi | RWO | 14d | redacted | 946 |
claim-redacted-900 | Bound | pvc-f6859607-e1c0-11e6-98ef-42010af000c3 | 10Gi | RWO | 16d | redacted | 900 |
hub-db-dir | Bound | pvc-ea8a0bef-dd44-11e6-98ef-42010af000c3 | 10Gi | RWO | 22d | NaN | NaN |
= df[~df['AGE'].str.contains('m')].dropna()
valids = valids[valids['id'] != '14']
valids valids.head()
STATUS | VOLUME | CAPACITY | ACCESSMODES | AGE | username | id | |
---|---|---|---|---|---|---|---|
NAME | |||||||
claim-redacted-205 | Bound | pvc-5643aadf-ddb3-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 205 |
claim-redacted-1088 | Bound | pvc-bc68d752-ecba-11e6-98ef-42010af000c3 | 10Gi | RWO | 2d | redacted | 1088 |
claim-redacted-389 | Bound | pvc-5d67d43e-ddcb-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 389 |
claim-redacted-119 | Bound | pvc-73c5d644-ddb1-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 119 |
claim-redacted-64 | Bound | pvc-3c2345e4-dd9f-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 64 |
'username'].head().value_counts() valids[
redacted 1
redacted 1
redacted 1
redacted 1
redacted 1
Name: username, dtype: int64
'id'].value_counts().head() valids[
400 1
90 1
756 1
488 1
1029 1
Name: id, dtype: int64
len(valids['id'])
1105
import sqlite3
= sqlite3.connect('jupyterhub.sqlite')
conn = conn.cursor() c
'PRAGMA TABLE_INFO({})'.format('users'))
c.execute(
c.fetchall()
[(0, 'id', 'INTEGER', 1, None, 1),
(1, 'name', 'VARCHAR(1023)', 0, None, 0),
(2, '_server_id', 'INTEGER', 0, None, 0),
(3, 'admin', 'BOOLEAN', 0, None, 0),
(4, 'last_activity', 'DATETIME', 0, None, 0),
(5, 'cookie_id', 'VARCHAR(1023)', 0, None, 0),
(6, 'state', 'TEXT', 0, None, 0),
(7, 'auth_state', 'TEXT', 0, None, 0)]
'SELECT * FROM users LIMIT 20')
c.execute(= c.fetchall()
current_users 2] current_users[:
[(1,
'redacted',
None,
1,
'2017-02-09 09:07:03.936620',
'8ec09f8def774b668e838d1e70e00329',
None,
None),
(2,
'redacted',
None,
1,
'2017-02-09 09:07:03.938610',
'c98f59afc1fe4a70b18b451ac1190d45',
None,
None)]
= {user[1] for user in current_users if user[3] == 1}
admins admins
{'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted'}
# I just did this manually in the sqlite3 CLI
# c.execute('DROP FROM users where id > -1')
# c.fetchall()
--------------------------------------------------------------------------- OperationalError Traceback (most recent call last) <ipython-input-73-a8eb9e4f2c5d> in <module>() ----> 1 c.execute('DROP FROM users where id > -1') 2 c.fetchall() OperationalError: near "FROM": syntax error
import datetime
import itertools
= list(zip(
records 'id'].astype(int),
valids['username'],
valids['NULL'),
itertools.repeat(1 if name in admins else 0 for name in valids['username']],
[# itertools.repeat('NULL'), # Pick a random valid value
'2017-02-09 09:07:03.936620'), # Pick a random valid value
itertools.repeat('thisisadummycookiehopefullyitworks'),
itertools.repeat('NULL'),
itertools.repeat('NULL')
itertools.repeat(
))3] records[:
[(205,
'redacted',
'NULL',
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
'NULL',
'NULL'),
(1088,
'redacted',
'NULL',
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
'NULL',
'NULL'),
(389,
'redacted',
'NULL',
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
'NULL',
'NULL')]
# I hate everything
def record_to_sql(record):
return "INSERT INTO users VALUES({}, '{}', {}, '{}', '{}', '{}', {}, {})".format(
*record
)
= [record_to_sql(record) for record in records]
sql_statements 0]) record_to_sql(records[
"INSERT INTO users VALUES(205, 'redacted', NULL, '0', '2017-02-09 09:07:03.936620', 'thisisadummycookiehopefullyitworks', NULL, NULL)"
[(0, 'id', 'INTEGER', 1, None, 1),
(1, 'name', 'VARCHAR(1023)', 0, None, 0),
(2, '_server_id', 'INTEGER', 0, None, 0),
(3, 'admin', 'BOOLEAN', 0, None, 0),
(4, 'last_activity', 'DATETIME', 0, None, 0),
(5, 'cookie_id', 'VARCHAR(1023)', 0, None, 0),
(6, 'state', 'TEXT', 0, None, 0),
(7, 'auth_state', 'TEXT', 0, None, 0)]
with conn:
= conn.cursor()
c for statement in sql_statements:
c.execute(statement)
'SELECT * FROM users LIMIT 20')
c.execute( c.fetchall()
[(2,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(3,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(4,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(5,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(6,
'redacted',
None,
1,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(7,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(8,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(9,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(10,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(11,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(12,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(13,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(15,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(17,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(18,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(19,
'redacted',
None,
1,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(20,
'redacted',
None,
1,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(21,
'redacted',
None,
1,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(22,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(24,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None)]