1 、摘要

本文主要讲解:根据GPS数据 ,找出公交车乘客的上车站点经纬度 。根据用户上车的坐标找到离用户最近的公交车站点坐标,近似推算用户上车的站点名,将上车和下车站点对应起来 ,即用户od。
主要思路:

  1. 按照乘客刷卡对应的POS卡号,找到相对应的车辆定位和时间
  2. 使用二分法根据用户的刷卡时间找到最近的公交车定位时间,从而对应用户的上车坐标
  3. 根据乘客乘坐的线路去找到线路名称对应的站点和经纬度
  4. 按照乘客刷卡对应的POS卡号 ,找到相对应的线路名
  5. 根据用户所乘线路,找到离用户上车坐标最近的站点
  6. 根据用户上车的坐标找到离用户最近的公交车站点坐标,近似推算用户上车的站点
  7. 使用时间匹配思路找出用户的下车站点 ,也可使用隐马尔可夫算法算出下车站点 ,具体请参考文末链接。

2、数据介绍

原始GPS数据如下:

最后的成果截图如下:

线路站点名如下:

上车站点及经纬度如下:

3、相关技术

二分法查找最近的时间思路如下:

(1)首先,从数组的中间元素开始搜索,如果该元素正好是目标元素 ,则搜索过程结束,否则执行下一步 。

(2)如果目标元素大于/小于中间元素,则在数组大于/小于中间元素的那一半区域查找 ,然后重复步骤(1)的操作。

(3)如果某一步数组为空,则表示找不到目标元素。

def str_search(li, card_car_date_time):
    start = 0
    end = len(li) - 1
    # 只要start和end 还没错开 就一直找
    while start <= end:
        # 通过计算获取当前查找范围的中间位置
        mid = (start + end) // 2
        geo_date_time = li[mid]
        geo_time = datetime.strptime(geo_date_time, "%Y/%m/%d %H:%M:%S")
        if end - start < 2:
            return geo_date_time
        # 如果mid比item大,说明item可能会出现在mid左边	,对左边再查找
        elif geo_time > card_car_date_time:
            end = mid - 1
        # mid 比item小,说明item有可能在mid右边,对右边再查找
        else:
            start = mid + 1

4 、完整代码

主运行程序入口
找出用户的上车站点经纬度
bus_up_station.py

import os
from datetime import datetime

import pandas as pd

from my_utils.read_write import writeCsv

'''
    此文件用于找出用户的上车站点经纬度
'''

os.chdir(r'D:\项目\公交数据处理\SQL\\')


def str_search(li, card_car_date_time):
    start = 0
    end = len(li) - 1
    # 只要start和end 还没错开 就一直找
    while start <= end:
        # 通过计算获取当前查找范围的中间位置
        mid = (start + end) // 2
        geo_date_time = li[mid]
        geo_time = datetime.strptime(geo_date_time, "%Y/%m/%d %H:%M:%S")
        if end - start < 2:
            return geo_date_time
        # 如果mid比item大	,说明item可能会出现在mid左边,对左边再查找
        elif geo_time > card_car_date_time:
            end = mid - 1
        # mid 比item小,说明item有可能在mid右边	,对右边再查找
        else:
            start = mid + 1


# 根据用户上车的坐标找到离用户最近的公交车站点坐标,近似推算用户上车的站点
# card_terminal: user_id	routeid	plateid	date	time	gpstime
# plate_geo: plateid	x	y	speed	gpstime
def compare_time_find_station(card_terminal, plate_geo):
    up_station_list = []
    gpstime = getattr(card_terminal, 'gpstime')
    card_car_date_time = datetime.strptime(gpstime, "%Y/%m/%d %H:%M:%S")
    time_list = plate_geo['gpstime']
    time_list = time_list.sort_values()
    time_list_copy = time_list.tolist()
    geo_time = str_search(time_list_copy, card_car_date_time)
    if geo_time:
        one_plate_geo = plate_geo[plate_geo['gpstime'] == geo_time]
        up_station_list.append(getattr(card_terminal, 'user_id'))
        up_station_list.append(getattr(card_terminal, 'routeid'))
        up_station_list.append(getattr(card_terminal, 'plateid'))
        # 此处取的时公交车定位的时间,而不是刷卡的时间
        up_station_list.append(getattr(card_terminal, 'gpstime'))
        up_station_list.append(one_plate_geo.iat[0, 1])
        up_station_list.append(one_plate_geo.iat[0, 2])
    return up_station_list


def loop():
    up_stations = []
    for card_terminal in user_plate.itertuples():
        plateid = getattr(card_terminal, 'plateid')
        # 按照乘客刷卡对应的POS卡号,找到相对应的车辆定位和时间	,
        plate_geo = GPS[GPS['plateid'] == plateid]
        # 根据用户的刷卡时间找到最近的公交车定位时间,从而对应用户的上车坐标
        if not plate_geo.empty:
            up_station = compare_time_find_station(card_terminal, plate_geo)
            if up_station:
                up_stations.append(up_station)
    writeCsv(up_stations, '乘客上车经纬度.csv')


if __name__ == "__main__":
    gps_Data = 'gps_Data.xlsx'

    GPS = pd.read_excel(gps_Data, sheet_name='_GPS20190426')

    user_plate = pd.read_excel(gps_Data, sheet_name='查询')
    loop()

find_station_name.py
找到站点名

import os

import pandas as pd

from my_utils.calculateDistance import getDistance
from my_utils.read_write import writeOneCsv

'''
    此文件用于找出用户的上车站点名
'''

os.chdir(r'D:\项目\公交数据处理\SQL\\')


# 根据用户上车的坐标找到离用户最近的公交车站点坐标,近似推算用户上车的站点
# user_id,routeid,plateid,gpstime,x,y
def compare_geo_find_station(line_station_geo, up_station):
    min_distance = 0.5
    up_station_name = []
    up_lon = getattr(up_station, 'x')
    up_lat = getattr(up_station, 'y')
    # routename	stopname	x	y
    for station_geo in line_station_geo.itertuples():
        line_lon = getattr(station_geo, 'x')
        line_lat = getattr(station_geo, 'y')
        distance = getDistance(line_lon, line_lat, up_lon, up_lat)
        if distance < min_distance:
            min_distance = distance
            up_station_name.clear()
            up_station_name.append(getattr(up_station, 'user_id'))
            up_station_name.append(getattr(up_station, 'routeid'))
            up_station_name.append(getattr(up_station, 'plateid'))
            up_station_name.append(getattr(up_station, 'gpstime'))
            up_station_name.append(getattr(station_geo, 'routename'))
            up_station_name.append(getattr(station_geo, 'stopname'))
            up_station_name.append(line_lon)
            up_station_name.append(line_lat)
            up_station_name.append(distance)
    del line_station_geo
    return up_station_name


# 按照乘客刷卡对应的POS卡号	,找到相对应的线路名
def find_line_stations(routeid):
    line = str(routeid)[3:] + '路'
    # 找到该线路的所有站点
    line_stations = Route[Route['routename'] == line]
    return line_stations


def loop():
    for up_station in up_stations.itertuples():
        routeid = getattr(up_station, 'routeid')
        # 根据乘客乘坐的线路去找到线路名称对应的站点和经纬度
        line_stations = find_line_stations(routeid)
        if not line_stations.empty:
            # 根据用户所乘线路,找到离用户上车坐标最近的站点
            up_station_name = compare_geo_find_station(line_stations, up_station)
            if up_station_name:
                writeOneCsv(up_station_name, 'up_station_name.csv')
        del up_station


if __name__ == "__main__":
    gps_Data = 'gps_Data.xlsx'
    # 获取公交线路数据经纬度数据
    # routename	stopname	x	y
    Route = pd.read_excel(gps_Data, sheet_name='Route')
    card_on_out = '乘客上车经纬度.csv'
    up_stations = pd.read_csv(card_on_out, encoding='gbk', engine='python')
    loop()

on_out_station.py
计算襄阳公交车OD

# -*- coding: utf-8
import os
from datetime import datetime

import pandas as pd

from my_utils.calculateDistance import getDistance
from my_utils.read_write import writeOneCsv

'''
    此文件用于找出用户的上车下车站点名,即用户od
'''
os.chdir(r'D:\项目\公交数据处理\SQL\\')


def minNums(startTime, endTime):
    '''计算两个时间点之间的分钟数'''
    total_seconds = (endTime - startTime).total_seconds()
    # 来获取准确的时间差	,并将时间差转换为秒
    mins = total_seconds / 60
    return int(mins)

# user_id,routeid,plateid,gpstime,routename,stopname,x,y,distance
def on_out_bus(user_id, group):
    length1_num = 0
    group = group.sort_values('gpstime')
    length = group.shape[0]
    for index in range(0, length):
        on_out_bus = []
        on_out_bus.append(group.iat[index, 4])
        on_out_bus.append(user_id)
        up_time = group.iat[index, 3]
        on_out_bus.append(group.iat[index, 1])
        up_stat = group.iat[index, 5]
        on_out_bus.append(up_stat)
        line_lon = group.iat[index, 6]
        line_lat = group.iat[index, 7]
        on_out_bus.append(line_lon)
        on_out_bus.append(line_lat)
        on_out_bus.append(up_time)
        # 默认设置下面这条为下车站点
        if length > index + 1:
            out_time = group.iat[index + 1, 3]
            out = group.iat[index + 1, 5]
            # 判断下车的时间是否大于上车的时间
            if up_stat != out and out_time > up_time:
                on_out_bus.append(out)
                up_lon = group.iat[index + 1, 6]
                up_lat = group.iat[index + 1, 7]
                on_out_bus.append(up_lon)
                on_out_bus.append(up_lat)
                on_out_bus.append(out_time)
                # 计算出行时间
                waste_time = minNums(up_time, out_time)
                on_out_bus.append(waste_time)
                # 计算出行距离
                distance = getDistance(float(line_lon), float(line_lat), float(up_lon), float(up_lat))
                on_out_bus.append(round(distance, 4))
                writeOneCsv(on_out_bus, 'up_down_stations.csv')
        # 如果只有一个上车站点就无法探测出下车站点
        elif length == 1:
            length1_num = length1_num + 1
        else:
            # 如果是最后一条就链接到第一条
            out_time = group.iat[0, 3]
            out = group.iat[0, 5]
            # 判断下车的时间是否大于上车的时间
            if out != up_stat and out_time > up_time:
                on_out_bus.append(out)
                up_lon = group.iat[0, 6]
                up_lat = group.iat[0, 7]
                on_out_bus.append(up_lon)
                on_out_bus.append(up_lat)
                on_out_bus.append(out_time)
                # 计算出行时间
                waste_time = minNums(up_time, out_time)
                on_out_bus.append(waste_time)
                # 计算出行距离
                distance = getDistance(float(line_lon), float(line_lat), float(up_lon), float(up_lat))
                on_out_bus.append(round(distance, 4))
                writeOneCsv(on_out_bus, 'up_down_stations.csv')


def get_bus_station():
    for name, group in grouped_upstation:
        on_out_bus(name, group)


if __name__ == '__main__':
    up_station_name = 'up_station_name.csv'
    up_station = pd.read_csv(up_station_name, engine='python', dtype='str', sep=',')
    up_station['gpstime'] = up_station['gpstime'].map(lambda x: datetime.strptime(x, '%Y/%m/%d %H:%M:%S'))
    up_station['date'] = up_station['gpstime'].map(lambda x: x.date())
    dateGroups = up_station.groupby('date')
    for date, group in dateGroups:
        grouped_upstation = group.groupby("user_id")
        get_bus_station()

5、参考链接

一种公交乘客出行站点预测方法、存储介质及服务器

python_计算深圳公交车出行OD

本文版权归趣快排SEO www.SeogurUblog.com 所有,如有转发请注明来出,竞价开户托管,seo优化请联系QQ▶61910465