Richard Lefebvre wrote:
Hi,
I have been going through the operations manual, but I can't find the
answer of how to find on which OSS each OST are?
Yes, that seems like an oversight to me too. I'd like to see lfs have a
new command to make this lookup easier.
If you have access to the servers, you could run something like:
pdsh -w host[1-128] "lctl dl |grep obdfilter"
But if you want to figure it out from just one place, say a client,
thats pretty difficult to piece together from the various commands.
Attached is my "ostmapping" script that you can run on a lustre client.
It farms various places: lfs, lctl, /proc, and lists all oss nids
showing the osts that are currently being exported to the client on
which you run the command. If the nid looks like an IP address, it even
tries to lookup the hostname associated with the address.
If anyone has a better way to do this, let me know!
Run "ostmapping -h" to see the options. You can also ask it to just
show the oss and osts for one filesystem, or just the osts for one oss.
Here's a sample of the its output:
172.16....@tcp (tycho1-lnet0)
lc1-OST0000_UUID 0 ACTIVE
lc1-OST0008_UUID 8 ACTIVE
lc1-OST0010_UUID 16 ACTIVE
172.16....@tcp (tycho2-lnet0)
lc1-OST0001_UUID 1 ACTIVE
lc1-OST0009_UUID 9 ACTIVE
lc1-OST0011_UUID 17 ACTIVE
172.16....@tcp (tycho3-lnet0)
lc1-OST0002_UUID 2 ACTIVE
lc1-OST000a_UUID 10 ACTIVE
lc1-OST0012_UUID 18 ACTIVE
172.16....@tcp (tycho4-lnet0)
lc1-OST0003_UUID 3 ACTIVE
lc1-OST000b_UUID 11 ACTIVE
lc1-OST0013_UUID 19 ACTIVE
#!/usr/bin/env python
import operator
import os
import re
import socket
import sys
import subprocess
from optparse import OptionParser
PROC_LLITE = '/proc/fs/lustre/llite'
PROC_OSC = '/proc/fs/lustre/osc'
def uuid_from_file(path, filename):
"""Works for files that have uuid and status, like "ost_server_uuid",
and also for files that just contain the uuid, like "uuid"."""
full_path = os.path.join(path, filename)
return open(full_path).readline().split()[0]
def get_fs_uuid_by_name():
"""Return a dictionary of filesystem uuids. Key is filesystem name."""
dictionary = {}
dirs = os.listdir(PROC_LLITE)
for d in dirs:
if not os.path.isdir(os.path.join(PROC_LLITE, d)):
continue
# Assume that the portion of the llite directory name before the
# last "-" is the name of the filesystem.
filesystem_name = d.rsplit('-', 1)[0]
filesystem_uuid = uuid_from_file(os.path.join(PROC_LLITE, d), 'uuid')
dictionary[filesystem_name] = filesystem_uuid
return dictionary
def get_fs_name_by_mntpt():
"""Return dictionary of filesystem names. Key is mount point."""
d = {}
for line in open('/proc/mounts'):
field = line.split()
fs_type = field[2]
if fs_type != 'lustre':
continue
fs_name = field[0].split(':/')[1]
mntpt = field[1]
d[mntpt] = fs_name
return d
def path_to_fs_uuid(path):
fs_uuid_by_name = get_fs_uuid_by_name()
fs_name_by_mntpt = get_fs_name_by_mntpt()
path = os.path.abspath(path)
# Keep checking dirname of path until the mount point is found.
while True:
if path in fs_name_by_mntpt:
return fs_uuid_by_name[fs_name_by_mntpt[path]]
if path == '/':
break
path = os.path.dirname(path)
return None
def ostuuid_from_oscuuid(osc_name):
path = os.path.join(PROC_OSC, osc_name+'/ost_server_uuid')
return uuid_from_file(os.path.join(PROC_OSC, osc_name), 'ost_server_uuid')
def get_osts_by_fs_uuid():
try:
lfs = subprocess.Popen(['lfs', 'osts'], stdout=subprocess.PIPE)
except:
print >>sys.stderr, 'Unable to execute "lfs".'
osts_info_dict = {}
for line in lfs.stdout:
line = line.strip()
args = re.split('[\s:]+', line)
if len(args) != 3:
continue
ost_idx = args[0]
ost_uuid = args[1]
ost_status = args[2]
osts_info_dict[ost_uuid] = [ost_uuid, ost_idx, ost_status]
# Get all of the osts on the system by walking through
# all of the oscs on the system. The lctl output listing
# the oscs associates each osc with a filesystem UUID,
# and then we can find out which ost the osc is associated with
# from /proc. Keep in mind that each ost can be in use by
# multiple oscs, so just ignore the duplicates.
try:
lctl = subprocess.Popen(['/usr/sbin/lctl', 'dl', '-t'],
stdout=subprocess.PIPE)
except:
print >>sys.stderr, 'Unable to execute "/usr/sbin/lctl dl -t".'
sys.exit(3)
osts_by_filesystem = {}
for line in lctl.stdout:
line = line.strip()
field = line.split()
device_type = field[2]
if device_type != 'osc':
continue
osc_name = field[3]
filesystem_uuid = field[4]
if filesystem_uuid not in osts_by_filesystem:
osts_by_filesystem[filesystem_uuid] = {}
osts_dict = osts_by_filesystem[filesystem_uuid]
ost_uuid = ostuuid_from_oscuuid(osc_name)
if ost_uuid in osts_dict:
# already found this ost through another osc
continue
ost_nid = field[6]
osts_info_dict[ost_uuid].append(ost_nid)
osts_dict[ost_uuid] = osts_info_dict[ost_uuid]
return osts_by_filesystem
def nid_sort(nida, nidb):
a = nida.split('@')
b = nidb.split('@')
if a[1] != b[1]:
return cmp(a[1], b[1])
a = a[0].split('.')
b = b[0].split('.')
if len(a) != len(b):
return cmp(len(a), len(b))
for (x, y) in zip(a, b):
if x != y:
try:
rc = cmp(int(x), int(y))
except:
rc = cmp(x, y)
return rc
return 0
def print_ost_mapping(ost_mapping):
sorted_nids = ost_mapping.keys()
sorted_nids.sort(cmp=nid_sort)
for server_nid in sorted_nids:
addr, domain = server_nid.split('@')
if domain == 'tcp':
try:
hostname = socket.gethostbyaddr(addr)[0]
except:
hostname = 'unknown'
print server_nid, '(' + hostname + ')'
else:
print server_nid
for ost in ost_mapping[server_nid]:
print ' %-20s %-5s %s' % (ost[0], ost[1], ost[2])
def main():
p = OptionParser()
p.add_option('-d', '--directory', dest='dir', metavar='DIR',
help='path to a lustre filesystem')
p.add_option('-s', '--server', dest='server_nid', metavar='NID',
help='list the osts on a single server')
(opts, args) = p.parse_args()
osts_by_fs_uuid = get_osts_by_fs_uuid()
if not osts_by_fs_uuid:
print 'No lustre filesystems mounted on this node?'
return 2
if opts.dir:
fs_uuid = path_to_fs_uuid(opts.dir)
if not fs_uuid:
print '"'+opts.dir+'" is not a lustre filesystem'
return 3
fs_uuids = [fs_uuid]
else:
fs_uuids = osts_by_fs_uuid.keys()
# Make a simple list of the ost info arrays
osts_list = []
for fs_uuid in fs_uuids:
fs_ost_dict = osts_by_fs_uuid[fs_uuid]
osts_list.extend(fs_ost_dict.values())
# Now make a dict of the arrays of ost info arrays,
# keyed by oss nids.
osts_dict = {}
for ost in osts_list:
nid = ost[3]
if nid not in osts_dict:
osts_dict[nid] = []
osts_dict[nid].append(ost)
# Sort the osts by index
for osts in osts_dict.values():
osts.sort(key=lambda x: int(x[1]))
if opts.server_nid:
if opts.server_nid not in osts_dict:
print 'Server NID not found'
return 2
osts_dict = dict({opts.server_nid: osts_dict[opts.server_nid]})
print_ost_mapping(osts_dict)
return 0
if __name__ == '__main__':
sys.exit(main())
_______________________________________________
Lustre-discuss mailing list
[email protected]
http://lists.lustre.org/mailman/listinfo/lustre-discuss